{
 "cells": [
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "%env LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1\n",
    "%env LLM_API_KEY=sk-替换为自己的Qwen API_KEY"
   ],
   "id": "7d40e87296b62df7"
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "af375836-b870-458b-87d1-4e00565977eb",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:00:40.186453Z",
     "iopub.status.busy": "2024-12-04T14:00:40.185220Z",
     "iopub.status.idle": "2024-12-04T14:00:40.194088Z",
     "shell.execute_reply": "2024-12-04T14:00:40.193180Z",
     "shell.execute_reply.started": "2024-12-04T14:00:40.186399Z"
    },
    "papermill": {
     "duration": 0.115454,
     "end_time": "2024-11-23T14:29:00.919641",
     "exception": false,
     "start_time": "2024-11-23T14:29:00.804187",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "%%capture --no-stderr\n",
    "!pip install -U langchain langchain_community langchain_openai pypdf sentence_transformers chromadb shutil"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "1e2c72b8-ee12-4130-af88-699998aa230c",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:00:40.195294Z",
     "iopub.status.busy": "2024-12-04T14:00:40.195017Z",
     "iopub.status.idle": "2024-12-04T14:00:40.424797Z",
     "shell.execute_reply": "2024-12-04T14:00:40.424295Z",
     "shell.execute_reply.started": "2024-12-04T14:00:40.195267Z"
    },
    "papermill": {
     "duration": 0.319981,
     "end_time": "2024-11-23T14:29:01.380771",
     "exception": false,
     "start_time": "2024-11-23T14:29:01.060790",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "841d2b02-ad06-40d2-b11f-c7adccec6ca2",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:00:40.425475Z",
     "iopub.status.busy": "2024-12-04T14:00:40.425305Z",
     "iopub.status.idle": "2024-12-04T14:00:40.428037Z",
     "shell.execute_reply": "2024-12-04T14:00:40.427659Z",
     "shell.execute_reply.started": "2024-12-04T14:00:40.425461Z"
    },
    "papermill": {
     "duration": 0.121409,
     "end_time": "2024-11-23T14:29:01.638126",
     "exception": false,
     "start_time": "2024-11-23T14:29:01.516717",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "expr_version = 'split_01_3_markdown_header_text_split_v2'\n",
    "\n",
    "preprocess_output_dir = os.path.join(os.path.pardir, 'outputs', 'v1_20240713')\n",
    "expr_dir = os.path.join(os.path.pardir, 'experiments', expr_version)\n",
    "\n",
    "os.makedirs(expr_dir, exist_ok=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cf7e81e3-4c82-4842-aef5-7592caaf1d39",
   "metadata": {
    "papermill": {
     "duration": 0.100379,
     "end_time": "2024-11-23T14:29:01.862379",
     "exception": false,
     "start_time": "2024-11-23T14:29:01.762000",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# 读取文档"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "e6920e29-bc7d-4635-be06-d151eaf0e100",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:00:40.429201Z",
     "iopub.status.busy": "2024-12-04T14:00:40.429052Z",
     "iopub.status.idle": "2024-12-04T14:00:42.199742Z",
     "shell.execute_reply": "2024-12-04T14:00:42.199256Z",
     "shell.execute_reply.started": "2024-12-04T14:00:40.429189Z"
    },
    "papermill": {
     "duration": 2.012298,
     "end_time": "2024-11-23T14:29:03.974974",
     "exception": false,
     "start_time": "2024-11-23T14:29:01.962676",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "from langchain_community.document_loaders import PyPDFLoader\n",
    "\n",
    "loader = PyPDFLoader(os.path.join(os.path.pardir, 'data', '2024全球经济金融展望报告.pdf'))\n",
    "\n",
    "pdf_documents = loader.load()\n",
    "markdown_documents = open(os.path.join(os.path.pardir, 'outputs', 'MinerU_parsed_20241204', '2024全球经济金融展望报告.md')).read()\n",
    "\n",
    "qa_df = pd.read_excel(os.path.join(preprocess_output_dir, 'question_answer.xlsx'))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "841ec659-4ad7-4e1f-b1ea-3477bf97fde3",
   "metadata": {
    "papermill": {
     "duration": 0.100297,
     "end_time": "2024-11-23T14:29:04.219302",
     "exception": false,
     "start_time": "2024-11-23T14:29:04.119005",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# 文档切分"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "74fe856a-7c19-4c3c-bb30-7abfa6298f74",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:00:42.200411Z",
     "iopub.status.busy": "2024-12-04T14:00:42.200247Z",
     "iopub.status.idle": "2024-12-04T14:00:42.208359Z",
     "shell.execute_reply": "2024-12-04T14:00:42.207992Z",
     "shell.execute_reply.started": "2024-12-04T14:00:42.200398Z"
    },
    "papermill": {
     "duration": 0.109229,
     "end_time": "2024-11-23T14:29:04.429069",
     "exception": false,
     "start_time": "2024-11-23T14:29:04.319840",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import pickle\n",
    "from langchain.text_splitter import MarkdownHeaderTextSplitter, RecursiveCharacterTextSplitter\n",
    "from uuid import uuid4\n",
    "\n",
    "def split_pdf_docs(documents, filepath, chunk_size=400, chunk_overlap=40, seperators=['\\n\\n\\n', '\\n\\n'], force_split=False):\n",
    "    if os.path.exists(filepath) and not force_split:\n",
    "        print('found cache, restoring...')\n",
    "        return pickle.load(open(filepath, 'rb'))\n",
    "\n",
    "    splitter = RecursiveCharacterTextSplitter(\n",
    "        chunk_size=chunk_size,\n",
    "        chunk_overlap=chunk_overlap,\n",
    "        separators=seperators\n",
    "    )\n",
    "    split_docs = splitter.split_documents(documents)\n",
    "    for chunk in split_docs:\n",
    "        chunk.metadata['uuid'] = str(uuid4())\n",
    "\n",
    "    pickle.dump(split_docs, open(filepath, 'wb'))\n",
    "\n",
    "    return split_docs\n",
    "\n",
    "def split_md_docs(markdown_document):\n",
    "    headers_to_split_on = [\n",
    "        (\"#\", \"Header 1\"),\n",
    "        (\"##\", \"Header 2\"),\n",
    "        (\"###\", \"Header 3\"),\n",
    "    ]\n",
    "    markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on)\n",
    "    md_header_splits = markdown_splitter.split_text(markdown_document)\n",
    "\n",
    "    return md_header_splits"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "aa25540d-0504-4ae7-9804-9e3862b132d5",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:00:42.208915Z",
     "iopub.status.busy": "2024-12-04T14:00:42.208799Z",
     "iopub.status.idle": "2024-12-04T14:00:42.221184Z",
     "shell.execute_reply": "2024-12-04T14:00:42.220779Z",
     "shell.execute_reply.started": "2024-12-04T14:00:42.208903Z"
    },
    "papermill": {
     "duration": 0.145583,
     "end_time": "2024-11-23T14:29:04.677429",
     "exception": false,
     "start_time": "2024-11-23T14:29:04.531846",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "found cache, restoring...\n"
     ]
    }
   ],
   "source": [
    "pdf_splitted_docs = split_pdf_docs(pdf_documents, os.path.join(preprocess_output_dir, 'split_docs.pkl'), chunk_size=500, chunk_overlap=50)\n",
    "md_splitted_docs = split_md_docs(markdown_documents)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "28d8135e-3fda-4c3b-9c69-059a2f014219",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:00:42.221747Z",
     "iopub.status.busy": "2024-12-04T14:00:42.221630Z",
     "iopub.status.idle": "2024-12-04T14:00:42.226431Z",
     "shell.execute_reply": "2024-12-04T14:00:42.226019Z",
     "shell.execute_reply.started": "2024-12-04T14:00:42.221735Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "52"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(pdf_splitted_docs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "c015e2ab-c5f6-4621-ba2a-9c7f26d887ae",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:00:42.227033Z",
     "iopub.status.busy": "2024-12-04T14:00:42.226876Z",
     "iopub.status.idle": "2024-12-04T14:00:42.232045Z",
     "shell.execute_reply": "2024-12-04T14:00:42.231742Z",
     "shell.execute_reply.started": "2024-12-04T14:00:42.227021Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[Document(page_content='研究院\\n全球经济金融展望报告\\n要点2024年年报（总第57期） 报告日期：2023年12月12日\\n●2023年全球经济增长动力持续回落，各国复苏分化，\\n发达经济体增速明显放缓，新兴经济体整体表现稳定。\\n全球贸易增长乏力，各国生产景气度逐渐回落，内需\\n对经济的拉动作用减弱。欧美央行货币政策紧缩态势\\n放缓，美元指数高位震荡后走弱，全球股市表现总体\\n好于预期，但区域分化明显。高利率环境抑制债券融\\n资需求，债券违约风险持续上升。\\n●展望2024年，预计全球经济复苏将依旧疲软，主要\\n经济体增长态势和货币政策走势将进一步分化。欧美\\n央行大概率结束本轮紧缩货币周期，美元指数将逐步\\n走弱，流向新兴经济体的跨境资本将增加。国际原油\\n市场短缺格局或延续，新能源发展成为重点。\\n●海湾六国经济发展与投资前景、高利率和高债务对\\n美国房地产市场脆弱性的影响等热点问题值得关注。中国银行研究院\\n全球经济金融研究课题组\\n组长：陈卫东\\n副组长：钟红\\n廖淑萍\\n成员：边卫红\\n熊启跃\\n王有鑫\\n曹鸿宇\\n李颖婷\\n王宁远\\n初晓\\n章凯莉\\n黄小军（纽约）\\n陆晓明（纽约）\\n黄承煜（纽约）\\n宋达志（伦敦）\\n李振龙（伦敦）\\n张传捷（伦敦）\\n刘冰彦（法兰克福）\\n温颍坤（法兰克福）\\n张明捷（法兰克福）\\n王哲（东京）\\n李彧（香港）\\n黎永康（香港）\\n联系人：王有鑫\\n电话：010-66594127\\n邮件：wangyouxin_hq@bank-of-china.com主要经济体GDP增速变化趋势（%）\\n资料来源：IMF，中国银行研究院', metadata={'source': 'data/2024全球经济金融展望报告.pdf', 'page': 0, 'uuid': 'e73a0c9d-d42b-4350-a4c3-b38bf67c68a5'}),\n",
       " Document(page_content='全球经济金融展望报告\\n中国银行研究院 1 2024年\\n全球经济复苏疲软，货币政策取向分化\\n——中国银行全球经济金融展望报告（2024年）\\n2023年，全球经济增长动力持续回落。分区域看，各国复苏存在较大差异，\\n发达经济体增速明显放缓，新兴经济体增速与2022年大致持平。生产端，全球\\n供应链持续恢复，但生产景气度逐渐回落。需求端，内需对经济的拉动作用逐\\n渐减弱，各国国内投资和跨境投资均持续承压；全球货物贸易量指数和价格指\\n数下行，主要经济体出口贸易同比增速下降。欧美央行货币政策延续收紧态势，\\n但步伐整体放缓；金融体系短期资金运行发生结构性变化，“去存款化”特征\\n突出。美元指数高位震荡后走弱，全球股市表现总体好于预期，但区域分化显\\n著。高利率环境抑制债券融资需求，债券违约风险持续上升，美国政府债务可\\n持续性问题引发市场关注。展望2024年，预计全球经济复苏将依旧疲软，主要\\n经济体增长态势和货币政策将进一步分化。欧美央行大概率结束本轮加息周期，\\n日本央行可能退出负利率政策，跨境资本回流美国趋势将放缓，流向新兴经济\\n体的资金将增加。美元指数将逐步走弱，新兴经济体货币汇率有望回升。国际\\n原油市场短缺格局或延续，新能源发展成为重点。本期报告分别对海湾六国经\\n济发展与投资前景、高利率和高债务对美国房地产市场脆弱性的影响两个专题\\n展开分析。\\n一、全球经济回顾与展望\\n（一）全球经济将在波动分化中筑底复苏\\n2023年，全球经济增长动力持续回落，经济增速连续两年下降。受地缘政\\n治冲突、高通胀、货币政策紧缩等因素影响，全球经济下行压力加大。预计2023\\n年全球GDP增速为2.7%（市场汇率法），较2022年下降0.3个百分点。', metadata={'source': 'data/2024全球经济金融展望报告.pdf', 'page': 2, 'uuid': '41d95288-441d-4c02-948a-6a3f0f4ef3ba'}),\n",
       " Document(page_content='全球经济金融展望报告\\n中国银行研究院 2 2024年\\n图1：全球GDP增速（%）\\n资料来源：IMF，中国银行研究院\\n分区域看，全球经济复苏不均衡，各国存在较大差异。发达经济体增速明\\n显放缓，预计2023年增速较2022年下降1个百分点。其中，欧元区和英国经\\n济增速大幅下降，美国表现好于其他发达经济体。2023年三季度，欧元区和英\\n国GDP环比增速均由之前的正增长转为负增长，分别下降0.1%和0.03%；美\\n国GDP环比增长折年率为4.9%，比二季度增速高2.8个百分点。新兴经济体增\\n速与2022年大致持平，预计2023年增速比2022年下降0.1个百分点。其中，\\n东南亚等出口型经济体增长承压，拉美、非洲等大宗商品出口国增速放缓，中\\n东欧国家经济增速加快（图2）。', metadata={'source': 'data/2024全球经济金融展望报告.pdf', 'page': 3, 'uuid': '1f406690-b478-43cd-96f8-cd77924e300e'})]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pdf_splitted_docs[:3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "414feddc-648f-444b-9988-224e6e6b2fb1",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:00:42.232712Z",
     "iopub.status.busy": "2024-12-04T14:00:42.232561Z",
     "iopub.status.idle": "2024-12-04T14:00:42.238181Z",
     "shell.execute_reply": "2024-12-04T14:00:42.237752Z",
     "shell.execute_reply.started": "2024-12-04T14:00:42.232700Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "43"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(md_splitted_docs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "543f6f4e-28c1-4238-ae99-9abab95c2318",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:00:42.238916Z",
     "iopub.status.busy": "2024-12-04T14:00:42.238592Z",
     "iopub.status.idle": "2024-12-04T14:00:42.244293Z",
     "shell.execute_reply": "2024-12-04T14:00:42.243843Z",
     "shell.execute_reply.started": "2024-12-04T14:00:42.238904Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[Document(metadata={'Header 1': '全球经济金融展望报告'}, page_content='2024年年报（总第57期）  \\n报告日期：2023年12月12日'),\n",
       " Document(metadata={'Header 1': '要点'}, page_content='●2023 年全球经济增长动力持续回落，各国复苏分化，发达经济体增速明显放缓，新兴经济体整体表现稳定。全球贸易增长乏力，各国生产景气度逐渐回落，内需对经济的拉动作用减弱。欧美央行货币政策紧缩态势放缓，美元指数高位震荡后走弱，全球股市表现总体好于预期，但区域分化明显。高利率环境抑制债券融资需求，债券违约风险持续上升。  \\n$\\\\bullet$ 展望2024 年，预计全球经济复苏将依旧疲软，主要经济体增长态势和货币政策走势将进一步分化。欧美央行大概率结束本轮紧缩货币周期，美元指数将逐步走弱，流向新兴经济体的跨境资本将增加。国际原油市场短缺格局或延续，新能源发展成为重点。  \\n$\\\\bullet$ 海湾六国经济发展与投资前景、高利率和高债务对美国房地产市场脆弱性的影响等热点问题值得关注。  \\n![](images/c7e6ce1606712e84e07a05bcf6016906efa3fc778e40fcd0e91ac4fcb5503b79.jpg)\\n主要经济体GDP 增速变化趋势（%）\\n资料来源：IMF，中国银行研究院'),\n",
       " Document(metadata={'Header 1': '中国银行研究院全球经济金融研究课题组'}, page_content='![](images/a5d0eb181c75231451c8f890ec50fe5822e2306a9beb543ca35a04880abbf639.jpg)  \\n联系人：王有鑫\\n电话：010-66594127\\n邮件： wangyouxin_hq@bank-of-china.com')]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "md_splitted_docs[:3]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4fb9cf39-1221-4b46-ab92-b300dc261c8e",
   "metadata": {},
   "source": [
    "## 检查一下切分后的块长度分布"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "c76b31aa-28af-430b-a62c-8879905176b7",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:00:42.245088Z",
     "iopub.status.busy": "2024-12-04T14:00:42.244764Z",
     "iopub.status.idle": "2024-12-04T14:00:42.252137Z",
     "shell.execute_reply": "2024-12-04T14:00:42.251801Z",
     "shell.execute_reply.started": "2024-12-04T14:00:42.245075Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count      52.000000\n",
       "mean      623.307692\n",
       "std       258.763920\n",
       "min        65.000000\n",
       "25%       476.750000\n",
       "50%       618.000000\n",
       "75%       801.250000\n",
       "max      1306.000000\n",
       "dtype: float64"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.Series([len(d.page_content) for d in pdf_splitted_docs]).describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "91e17fe4-4ef8-4768-932e-ed9cfb76eef6",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:00:42.252696Z",
     "iopub.status.busy": "2024-12-04T14:00:42.252568Z",
     "iopub.status.idle": "2024-12-04T14:00:42.259676Z",
     "shell.execute_reply": "2024-12-04T14:00:42.259238Z",
     "shell.execute_reply.started": "2024-12-04T14:00:42.252684Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count      43.000000\n",
       "mean      749.395349\n",
       "std       673.945036\n",
       "min        33.000000\n",
       "25%       241.000000\n",
       "50%       462.000000\n",
       "75%      1075.500000\n",
       "max      2839.000000\n",
       "dtype: float64"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.Series([len(d.page_content) for d in md_splitted_docs]).describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b7872f43-d308-4eed-9dc0-9ef73cd96ba9",
   "metadata": {},
   "source": [
    "## 检查超长块"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "a0370f48-6a02-4aac-a841-5a911182a4af",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:00:42.261731Z",
     "iopub.status.busy": "2024-12-04T14:00:42.261596Z",
     "iopub.status.idle": "2024-12-04T14:00:42.265858Z",
     "shell.execute_reply": "2024-12-04T14:00:42.265417Z",
     "shell.execute_reply.started": "2024-12-04T14:00:42.261718Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "page_content='2023年，全球经济增长动力持续回落，经济增速连续两年下降。受地缘政治冲突、高通胀、货币政策紧缩等因素影响，全球经济下行压力加大。预计2023年全球GDP增速为 $2.7\\%$ （市场汇率法），较2022年下降0.3个百分点。  \n",
      "![](images/7600acb45b91442f8127f20629c791d91f04827835929cb12612c409fde82574.jpg)\n",
      "图1：全球GDP增速 $(\\%)$ ）  \n",
      "资料来源：IMF，中国银行研究院  \n",
      "分区域看，全球经济复苏不均衡，各国存在较大差异。发达经济体增速明显放缓，预计2023年增速较2022年下降1个百分点。其中，欧元区和英国经济增速大幅下降，美国表现好于其他发达经济体。2023年三季度，欧元区和英国GDP环比增速均由之前的正增长转为负增长，分别下降 $0.1\\%$ 和 $0.03\\%$ ；美国GDP环比增长折年率为 $4.9\\%$ ，比二季度增速高2.8个百分点。新兴经济体增速与2022年大致持平，预计2023年增速比2022年下降0.1个百分点。其中，东南亚等出口型经济体增长承压，拉美、非洲等大宗商品出口国增速放缓，中东欧国家经济增速加快（图2）。  \n",
      "![](images/abf30ccab508a0c4733d58e3810cda53dabdaeb4239acf37e57a931a0296d80c.jpg)\n",
      "图2：主要经济体GDP增速变化趋势（%）\n",
      "注：东盟五国包含印度尼西亚、马来西亚、菲律宾、新加坡和泰国。  \n",
      "资料来源：IMF，中国银行研究院  \n",
      "从生产端看，全球供应链持续恢复，但生产景气度逐渐回落。截至2023年10月底，纽约联储全球供应链压力指数降至有记录以来的最低值。荷兰经济分析局数据显示，全球工业生产量于4月触及年内低位，5-8月逐月回升，但发达经济体和新兴经济体分化明显（图3）。其中，主要新兴经济体工业生产指数普遍走高，如俄罗斯、土耳其、南非等，而发达经济体中的美国和韩国回升，英国、德国、意大利下行，日本波动较大，整体趋于平稳。全球融资环境收紧和经济下行压力对工业生产前景带来较大影响，全球制造业PMI指数明显回落，从2月的 $49.9\\%$ 降至10月的 $48.8\\%$ 。  \n",
      "![](images/7d2b17776c10d8fc38a113a20b40791a9e65da33b4209516d0bde88163bee3ea.jpg)\n",
      "图3：部分经济体工业生产指数变化趋势（2010年 $\\mathbf{-100}\\rangle$ ）\n",
      "资料来源：荷兰经济分析局，中国银行研究院  \n",
      "从需求端看，内需是支撑发达经济体增长的主要动力，但对经济的拉动作用逐渐减弱。美国消费未受加息明显影响，私人消费维持稳定增长，前三季度对美国经济增长的贡献率高达 $64.4\\%$ ；8-9月，美国零售和食品销售额连续两个月环比增速保持在 $0.7\\%$ 以上，高于市场预期，但10月增速大幅回落至 $-0.1\\%$ 。欧洲各国消费指数整体维持稳定（图4），是上半年免于陷入衰退的主要动力。但随着高利率和高通胀持续，对消费的影响逐渐释放，内需增长动力逐渐弱化，全球服务业PMI指数从二季度开始明显回落，从5月的 $55.5\\%$ 降至10月的 $50.4\\%$ 连续5个月下行；OECD消费者信心指数从7月开始连续3个月回落。  \n",
      "![](images/3876098e7c8b21ca208f46cd2b25aa420574a706ae2648c774fcf130fac892db.jpg)\n",
      "图4：部分欧洲国家零售销售指数\n",
      "注：除英国是以2019年为基年外，其他经济体均为2015年为基年。资料来源：Wind，中国银行研究院  \n",
      "发达经济体投资受加息政策影响较大，国内投资和跨境投资均持续承压。美国私人投资在2023年一季度触底后逐渐反弹，三季度存货及住宅投资恢复增长，带动私人投资增速提升至 $8.4\\%$ （经季调后环比折年率），但制造业和设备投资均放缓，环比增长折年率分别降低 $0.1\\%$ 和 $3.8\\%$ 。欧盟投资增速放缓，房地产投资减少。2023年二季度，欧元区固定资本形成总额环比增长 $0.1\\%$ ，比一季度增速下降0.3个百分点，房地产对GDP环比增长拉动率转为负值。在紧缩货币政策影响下，发达经济体企业部门宏观杠杆率下降，企业加杠杆或负债投资意愿不足。同2022年底相比，2023年二季度，美国、英国、法国、意大利和德国非金融企业部门负债率分别下降了2.4个、3.4个、4.0个、3.0个和1.3个百分点（图5）。IMF预测2023年全球投资率（投资占GDP的比重）将下降1.0个百分点至 $26.4\\%$ （图6），其中，欧盟将下降1.1个百分点，比发达经济体平均降幅高0.2个百分点。从跨境投资角度看，受地缘政治局势紧张、金融领域动荡加剧、高利率和投资审查趋严等影响，并购交易仍然疲软，而在全球产业链重塑背景下，东南亚等区域绿地投资恢复增长。联合国贸发会议预计2023年全球跨境直接投资将继续下行，但降幅较2022年收窄。  \n",
      "![](images/876898312b7f8b55b06bc9b09f7a585aebeb663d67fe281dfb4ed939588a8d6e.jpg)\n",
      "图5：部分发达国家非金融企业部门债务率（%）\n",
      "资料来源：IIF，中国银行研究院\n",
      "图6：全球投资率变化趋势 $(\\,\\%)$ ）  \n",
      "![](images/5c049cdfb254bf5b7720e25c09f7e2e434c5c77b74897dfb28ee5154f4ef318e.jpg)\n",
      "资料来源：IMF，中国银行研究院  \n",
      "从国际贸易角度看，全球货物贸易量和价格指数均承压下行，主要经济体出口贸易同比增速下降。荷兰经济分析局数据显示，2023年1-8月，全球货物贸易量指数和价格指数均震荡下行，8月数值比1月分别下降0.9个和4.3个点（图7）。10月，世贸组织将2023年全球货物贸易增速预测值下调0.9个百分点至 $0.8\\%$ ，2023年国际贸易增长或为近几年最低水平。但近期东亚、东南亚等主要经济体出口下行趋势收窄，贸易呈现企稳迹象。9-10月，越南出口结束连续10个月的负增长态势，同比分别增长 $5.0\\%$ 和 $6.7\\%$ 。10月，韩国出口同比增长 $5.1\\%$ ，是自2022年10月以来首次正增长。  \n",
      "![](images/5826ae44f43ef12c95089d898a8b9375a7e989c7ba7a6de6388cbdd174b65516.jpg)\n",
      "图7：全球货物贸易量指数和货物贸易价格指数（2010年 $\\mathbf{-100}.$ ）\n",
      "资料来源：荷兰经济分析局，中国银行研究院' metadata={'Header 1': '（一）全球经济将在波动分化中筑底复苏'}\n"
     ]
    }
   ],
   "source": [
    "for d in md_splitted_docs:\n",
    "    if len(d.page_content) > 2000:\n",
    "        print(d)\n",
    "        break"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7ec5e814-0a7e-4910-b3d1-e56343aded72",
   "metadata": {},
   "source": [
    "考虑到有不少切片依然非常大，此处对较大的片段做二次切分"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "44de0a4c-df8f-4cd2-84ab-32d7c56df772",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:00:42.266434Z",
     "iopub.status.busy": "2024-12-04T14:00:42.266306Z",
     "iopub.status.idle": "2024-12-04T14:00:42.273701Z",
     "shell.execute_reply": "2024-12-04T14:00:42.273238Z",
     "shell.execute_reply.started": "2024-12-04T14:00:42.266421Z"
    }
   },
   "outputs": [],
   "source": [
    "from langchain.text_splitter import MarkdownTextSplitter\n",
    "\n",
    "new_md_splitted_docs = []\n",
    "splitter = MarkdownTextSplitter(\n",
    "    chunk_size=500,\n",
    "    chunk_overlap=50\n",
    ")\n",
    "for doc in md_splitted_docs:\n",
    "    if len(doc.page_content) > 700:\n",
    "        small_chunks = splitter.split_documents([doc])\n",
    "        new_md_splitted_docs.extend(small_chunks)\n",
    "    else:\n",
    "        new_md_splitted_docs.append(doc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "d1ef80c2-b46c-4a4c-abcb-8e1c9dfc836e",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:00:42.274299Z",
     "iopub.status.busy": "2024-12-04T14:00:42.274169Z",
     "iopub.status.idle": "2024-12-04T14:00:42.280683Z",
     "shell.execute_reply": "2024-12-04T14:00:42.279964Z",
     "shell.execute_reply.started": "2024-12-04T14:00:42.274286Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "96"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(new_md_splitted_docs)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "220dbc3a-fceb-4e49-a3f1-01e16660b2a6",
   "metadata": {
    "papermill": {
     "duration": 0.100209,
     "end_time": "2024-11-23T14:29:05.255871",
     "exception": false,
     "start_time": "2024-11-23T14:29:05.155662",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# 检索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "8598a11c-25d8-4af1-a98b-06a8c394e261",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:00:42.281387Z",
     "iopub.status.busy": "2024-12-04T14:00:42.281214Z",
     "iopub.status.idle": "2024-12-04T14:00:43.197450Z",
     "shell.execute_reply": "2024-12-04T14:00:43.196978Z",
     "shell.execute_reply.started": "2024-12-04T14:00:42.281372Z"
    },
    "papermill": {
     "duration": 0.989203,
     "end_time": "2024-11-23T14:29:06.345534",
     "exception": false,
     "start_time": "2024-11-23T14:29:05.356331",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "device: cuda\n"
     ]
    }
   ],
   "source": [
    "from langchain.embeddings import HuggingFaceBgeEmbeddings\n",
    "from langchain_community.vectorstores import Chroma\n",
    "import torch\n",
    "\n",
    "device = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
    "print(f'device: {device}')\n",
    "\n",
    "def get_embeddings(model_path):\n",
    "    embeddings = HuggingFaceBgeEmbeddings(\n",
    "        model_name=model_path,\n",
    "        model_kwargs={'device': device},\n",
    "        encode_kwargs={'normalize_embeddings': True},\n",
    "        # show_progress=True\n",
    "        query_instruction='为这个句子生成表示以用于检索相关文章：'\n",
    "    )\n",
    "    return embeddings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "f6f46c73-7369-448f-a89a-ed3d817cad47",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:00:43.198328Z",
     "iopub.status.busy": "2024-12-04T14:00:43.197939Z",
     "iopub.status.idle": "2024-12-04T14:00:46.096948Z",
     "shell.execute_reply": "2024-12-04T14:00:46.096447Z",
     "shell.execute_reply.started": "2024-12-04T14:00:43.198314Z"
    },
    "papermill": {
     "duration": 83.983138,
     "end_time": "2024-11-23T14:35:06.117207",
     "exception": false,
     "start_time": "2024-11-23T14:33:42.134069",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import shutil\n",
    "\n",
    "from tqdm.auto import tqdm\n",
    "from langchain_community.vectorstores import Chroma\n",
    "\n",
    "model_path = 'BAAI/bge-large-zh-v1.5'\n",
    "embeddings = get_embeddings(model_path)\n",
    "\n",
    "def get_vector_db(splitted_docs, embeddings, name):\n",
    "    persist_directory = os.path.join(expr_dir, 'chroma', 'bge', name)\n",
    "    shutil.rmtree(persist_directory, ignore_errors=True)\n",
    "    vector_db = Chroma.from_documents(\n",
    "        splitted_docs,\n",
    "        embedding=embeddings,\n",
    "        persist_directory=persist_directory\n",
    "    )\n",
    "    return vector_db"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3318f9bb-a7f8-4c44-bf8d-302b71dca44c",
   "metadata": {},
   "source": [
    "使用新的切分方式，每个切片的UUID跟原始切片不一致了，检索的Ground Truth丢失了，此处通过向量检索的方式，将原始的UUID复制到Markdown的切片上，方便后续排查检索问题"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "b48362cc-5776-4f1c-8feb-64b1a4a675e8",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:00:46.097693Z",
     "iopub.status.busy": "2024-12-04T14:00:46.097438Z",
     "iopub.status.idle": "2024-12-04T14:01:03.036660Z",
     "shell.execute_reply": "2024-12-04T14:01:03.034313Z",
     "shell.execute_reply.started": "2024-12-04T14:00:46.097680Z"
    }
   },
   "outputs": [],
   "source": [
    "pdf_vector_db = get_vector_db(pdf_splitted_docs, embeddings, 'pdf')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "dabf2d44-5afa-41f4-bd6c-1cbaaf00e571",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:01:03.039991Z",
     "iopub.status.busy": "2024-12-04T14:01:03.039252Z",
     "iopub.status.idle": "2024-12-04T14:01:07.952519Z",
     "shell.execute_reply": "2024-12-04T14:01:07.952149Z",
     "shell.execute_reply.started": "2024-12-04T14:01:03.039920Z"
    }
   },
   "outputs": [],
   "source": [
    "# 以新切片作为query，查询旧切片中最相似的那个，将它的UUID复制到新切片中\n",
    "for doc in new_md_splitted_docs:\n",
    "    query = doc.page_content\n",
    "    # 只检索最相似的那个\n",
    "    chunk_score_pair = pdf_vector_db.similarity_search_with_relevance_scores(query, k=1)[0]\n",
    "    doc.metadata['uuid'] = chunk_score_pair[0].metadata['uuid']\n",
    "    doc.metadata['pdf_chunk_sim'] = chunk_score_pair[1]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5a655299-15f5-44b1-925d-5137a1e1c881",
   "metadata": {},
   "source": [
    "chunk_score_pair的结构如下"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "46817374-ceb2-486a-a7f2-240c2abc98f0",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:01:07.953120Z",
     "iopub.status.busy": "2024-12-04T14:01:07.952987Z",
     "iopub.status.idle": "2024-12-04T14:01:07.956043Z",
     "shell.execute_reply": "2024-12-04T14:01:07.955745Z",
     "shell.execute_reply.started": "2024-12-04T14:01:07.953107Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(Document(metadata={'page': 51, 'source': 'data/2024全球经济金融展望报告.pdf', 'uuid': 'ebf0d999-59f6-4fd3-941e-05a7a60c255a'}, page_content='免责声明\\n本研究报告由中国银行研究院撰写，研究报告中所引用信息均来自公开资料。\\n本研究报告中包含的观点或估计仅代表作者迄今为止的判断，它们不一定反映中国银行的观点。中国\\n银行研究院可以不经通知加以改变，且没有对此报告更新、修正或修改的责任。\\n本研究报告内容及观点仅供参考，不构成任何投资建议。对于本报告所提供信息所导致的任何直接的\\n或者间接的投资盈亏后果不承担任何责任。\\n本研究报告版权仅为中国银行研究院所有，未经书面许可，任何机构和个人不得以任何形式翻版、复\\n制和发布。如引用发布，需注明出处为中国银行研究院，且不得对本报告进行有悖原意的引用、删节和修\\n改。中国银行研究院保留对任何侵权行为和有悖报告原意的引用行为进行追究的权利。'),\n",
       " 0.8547903630738032)"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "chunk_score_pair"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "62560d74-7d90-4e69-ae43-162b248e1622",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:01:07.956564Z",
     "iopub.status.busy": "2024-12-04T14:01:07.956441Z",
     "iopub.status.idle": "2024-12-04T14:01:26.062854Z",
     "shell.execute_reply": "2024-12-04T14:01:26.061660Z",
     "shell.execute_reply.started": "2024-12-04T14:01:07.956552Z"
    }
   },
   "outputs": [],
   "source": [
    "md_vector_db = get_vector_db(new_md_splitted_docs, embeddings, 'md')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "55d51ebc-b29d-45be-b8c7-1d5610b270b8",
   "metadata": {},
   "source": [
    "# 计算检索准确率"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "ad8ef473-7ad8-43d4-8b9a-9890cf3bf4c6",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:01:26.064477Z",
     "iopub.status.busy": "2024-12-04T14:01:26.064113Z",
     "iopub.status.idle": "2024-12-04T14:01:26.073140Z",
     "shell.execute_reply": "2024-12-04T14:01:26.071378Z",
     "shell.execute_reply.started": "2024-12-04T14:01:26.064442Z"
    }
   },
   "outputs": [],
   "source": [
    "test_df = qa_df[(qa_df['dataset'] == 'test') & (qa_df['qa_type'] == 'detailed')]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "070b78ef-3140-4e59-886c-09c5184a8ee9",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:01:26.075459Z",
     "iopub.status.busy": "2024-12-04T14:01:26.074850Z",
     "iopub.status.idle": "2024-12-04T14:01:26.093320Z",
     "shell.execute_reply": "2024-12-04T14:01:26.091026Z",
     "shell.execute_reply.started": "2024-12-04T14:01:26.075378Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "93"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(test_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "435148a0-b2b1-49fb-8eea-2ad117c0b9d4",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:01:26.096998Z",
     "iopub.status.busy": "2024-12-04T14:01:26.096288Z",
     "iopub.status.idle": "2024-12-04T14:01:26.111570Z",
     "shell.execute_reply": "2024-12-04T14:01:26.109635Z",
     "shell.execute_reply.started": "2024-12-04T14:01:26.096930Z"
    }
   },
   "outputs": [],
   "source": [
    "def get_hit_stat_df(vector_db, top_k_arr=list(range(1, 9))):\n",
    "    hit_stat_data = []\n",
    "\n",
    "    for k in tqdm(top_k_arr):\n",
    "        for idx, row in test_df.iterrows():\n",
    "            question = row['question']\n",
    "            true_uuid = row['uuid']\n",
    "            # chunks = retrieve_fn(question, k=k)\n",
    "            chunks = vector_db.similarity_search(question, k=k)\n",
    "            retrieved_uuids = [doc.metadata['uuid'] for doc in chunks]\n",
    "\n",
    "            hit_stat_data.append({\n",
    "                'question': question,\n",
    "                'top_k': k,\n",
    "                'hit': int(true_uuid in retrieved_uuids),\n",
    "                'retrieved_chunks': len(chunks)\n",
    "            })\n",
    "    hit_stat_df = pd.DataFrame(hit_stat_data)\n",
    "    return hit_stat_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "01e01af2-9f53-462a-bcb1-2864864e6488",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:01:26.114437Z",
     "iopub.status.busy": "2024-12-04T14:01:26.113734Z",
     "iopub.status.idle": "2024-12-04T14:01:45.854858Z",
     "shell.execute_reply": "2024-12-04T14:01:45.854510Z",
     "shell.execute_reply.started": "2024-12-04T14:01:26.114369Z"
    }
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "cd2a5e90871a4b56a38b3cf352cd77c1",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/8 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "hit_stat_df = get_hit_stat_df(md_vector_db)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "de0c3de0-92b5-4804-a374-108984640cf8",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:01:45.855575Z",
     "iopub.status.busy": "2024-12-04T14:01:45.855401Z",
     "iopub.status.idle": "2024-12-04T14:01:45.862194Z",
     "shell.execute_reply": "2024-12-04T14:01:45.861771Z",
     "shell.execute_reply.started": "2024-12-04T14:01:45.855562Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>top_k</th>\n",
       "      <th>hit_rate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0.376344</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>0.494624</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>0.559140</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>0.602151</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>0.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>0.698925</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>0.752688</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>0.752688</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   top_k  hit_rate\n",
       "0      1  0.376344\n",
       "1      2  0.494624\n",
       "2      3  0.559140\n",
       "3      4  0.602151\n",
       "4      5  0.666667\n",
       "5      6  0.698925\n",
       "6      7  0.752688\n",
       "7      8  0.752688"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "hit_stat_df.groupby(['top_k'])['hit'].mean().reset_index().rename(columns={'hit': 'hit_rate'})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "ccc0dca9-8ad6-4d0c-a6e1-8279babbdfbf",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:01:45.862777Z",
     "iopub.status.busy": "2024-12-04T14:01:45.862612Z",
     "iopub.status.idle": "2024-12-04T14:01:46.230987Z",
     "shell.execute_reply": "2024-12-04T14:01:46.230572Z",
     "shell.execute_reply.started": "2024-12-04T14:01:45.862765Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Axes: xlabel='top_k', ylabel='hit'>"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAGxCAYAAACeKZf2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAmN0lEQVR4nO3df1iUdb7/8dcwyiCpmCGgRLJqK2IKLQSLntJTtBzz6uQ5ZzvUqWCn4lzfzWlp51tfJQuytLGtCK/ykjTJvrWutp1+7cnox3zD1o0Ww9zVrax2UygdkKtNlGrYZub7R1fTcgSDgrmHD8/Hdd3X1dzzuWfe93q1PZ25Z8YWCoVCAgAAMESM1QMAAAAMJuIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFFGWT1ApAWDQR06dEjjxo2TzWazehwAANAPoVBIx44d05QpUxQTc/LXZkZc3Bw6dEhpaWlWjwEAAL6F1tZWnX766SddM+LiZty4cZK+/B9n/PjxFk8DAAD6o7OzU2lpaeH/jp/MiIubr96KGj9+PHEDAMAw059LSrigGAAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUUZZPQAAAIPt7dX/z+oRBsWsFecPaP1tt902NINE2Hc9D165AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGCUqIibdevWKT09XXFxccrPz1dTU1OfaxcuXCibzXbCtnjx4ghODAAAopXlcbNt2za53W5VVVVp9+7dysrKUlFRkdrb23td/+STT+rw4cPhbd++fbLb7br00ksjPDkAAIhGlsdNdXW1ysrK5HQ6lZmZqdraWsXHx6uurq7X9RMnTlRKSkp4e+mllxQfH0/cAAAASRbHTXd3t5qbm1VYWBjeFxMTo8LCQjU2NvbrMTZt2qTLLrtMp5xySq/3+/1+dXZ29tgAAIC5LI2bjo4OBQIBJScn99ifnJwsn8/3jcc3NTVp3759uvbaa/tc4/F4lJCQEN7S0tK+89wAACB6jbJ6gO9i06ZNmjNnjvLy8vpcU1FRIbfbHb7d2dlJ4AAYMVZf+WOrRxgUKx57wuoRMIxYGjeJiYmy2+1qa2vrsb+trU0pKSknPbarq0tbt27V7bffftJ1DodDDofjO88KAACGB0vfloqNjVVOTo68Xm94XzAYlNfrVUFBwUmP/fWvfy2/368rr7xyqMcEAADDiOVvS7ndbpWWlio3N1d5eXmqqalRV1eXnE6nJKmkpESpqanyeDw9jtu0aZOWLFmi0047zYqxAQBAlLI8boqLi3XkyBFVVlbK5/MpOztb9fX14YuMW1paFBPT8wWm/fv3a+fOnXrxxRetGBkAAEQxy+NGklwul1wuV6/3NTQ0nLBv5syZCoVCQzwVAAAYjiz/Ej8AAIDBRNwAAACjEDcAAMAoxA0AADAKcQMAAIxC3AAAAKMQNwAAwCjEDQAAMApxAwAAjBIV31AMAEPtgf/9G6tHGBSuey+2egQg6vHKDQAAMApxAwAAjELcAAAAoxA3AADAKMQNAAAwCnEDAACMQtwAAACjEDcAAMAoxA0AADAKcQMAAIxC3AAAAKMQNwAAwCjEDQAAMApxAwAAjELcAAAAoxA3AADAKMQNAAAwCnEDAACMQtwAAACjEDcAAMAoxA0AADAKcQMAAIxC3AAAAKMQNwAAwCjEDQAAMApxAwAAjGJ53Kxbt07p6emKi4tTfn6+mpqaTrr+k08+0dKlSzV58mQ5HA59//vf1/bt2yM0LQAAiHajrHzybdu2ye12q7a2Vvn5+aqpqVFRUZH279+vpKSkE9Z3d3frwgsvVFJSkp544gmlpqbq4MGDmjBhQuSHBwAAUcnSuKmurlZZWZmcTqckqba2Vs8995zq6uq0fPnyE9bX1dXp448/1muvvabRo0dLktLT0yM5MgAAiHKWxU13d7eam5tVUVER3hcTE6PCwkI1Njb2esyzzz6rgoICLV26VM8884wmTZqk//iP/9CyZctkt9t7Pcbv98vv94dvd3Z2Du6JAMPMjvMWWD3CoFjw6g6rRwAQpSy75qajo0OBQEDJyck99icnJ8vn8/V6zF/+8hc98cQTCgQC2r59u2699Vbde++9WrVqVZ/P4/F4lJCQEN7S0tIG9TwAAEB0sfyC4oEIBoNKSkrShg0blJOTo+LiYq1YsUK1tbV9HlNRUaGjR4+Gt9bW1ghODAAAIs2yt6USExNlt9vV1tbWY39bW5tSUlJ6PWby5MkaPXp0j7egZs2aJZ/Pp+7ubsXGxp5wjMPhkMPhGNzhAQBA1LLslZvY2Fjl5OTI6/WG9wWDQXm9XhUUFPR6zPz58/X+++8rGAyG97377ruaPHlyr2EDAABGHkvflnK73dq4caMeeeQRvf322/rpT3+qrq6u8KenSkpKelxw/NOf/lQff/yxysvL9e677+q5557TnXfeqaVLl1p1CgAAIMpY+lHw4uJiHTlyRJWVlfL5fMrOzlZ9fX34IuOWlhbFxHzdX2lpaXrhhRf085//XHPnzlVqaqrKy8u1bNkyq04BAABEGUvjRpJcLpdcLlev9zU0NJywr6CgQK+//voQTwUAAIarYfVpKQAAgG9C3AAAAKMQNwAAwCjEDQAAMApxAwAAjELcAAAAoxA3AADAKMQNAAAwCnEDAACMQtwAAACjEDcAAMAoxA0AADCK5T+cCVhl/v3zrR5hUPzu+t9ZPQIARBVeuQEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRoiJu1q1bp/T0dMXFxSk/P19NTU19rt28ebNsNluPLS4uLoLTAgCAaGZ53Gzbtk1ut1tVVVXavXu3srKyVFRUpPb29j6PGT9+vA4fPhzeDh48GMGJAQBANLM8bqqrq1VWVian06nMzEzV1tYqPj5edXV1fR5js9mUkpIS3pKTkyM4MQAAiGaWxk13d7eam5tVWFgY3hcTE6PCwkI1Njb2edzx48c1depUpaWl6ZJLLtGf/vSnSIwLAACGAUvjpqOjQ4FA4IRXXpKTk+Xz+Xo9ZubMmaqrq9Mzzzyjxx57TMFgUPPmzdOHH37Y63q/36/Ozs4eGwAAMJflb0sNVEFBgUpKSpSdna0FCxboySef1KRJk/Tggw/2ut7j8SghISG8paWlRXhiAAAQSZbGTWJioux2u9ra2nrsb2trU0pKSr8eY/To0Tr77LP1/vvv93p/RUWFjh49Gt5aW1u/89wAACB6jbLyyWNjY5WTkyOv16slS5ZIkoLBoLxer1wuV78eIxAIaO/evbrooot6vd/hcMjhcAzWyEZquX2O1SMMijMq91o9AgAgClgaN5LkdrtVWlqq3Nxc5eXlqaamRl1dXXI6nZKkkpISpaamyuPxSJJuv/12/fCHP9SMGTP0ySef6O6779bBgwd17bXXWnkaAAAgSlgeN8XFxTpy5IgqKyvl8/mUnZ2t+vr68EXGLS0tion5+t2zv/71ryorK5PP59Opp56qnJwcvfbaa8rMzLTqFAAAQBSxPG4kyeVy9fk2VENDQ4/b9913n+67774ITAUAAIajYfdpKQAAgJMhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABglKuJm3bp1Sk9PV1xcnPLz89XU1NSv47Zu3SqbzaYlS5YM7YAAAGDY+FZxc/755+uTTz45YX9nZ6fOP//8AT3Wtm3b5Ha7VVVVpd27dysrK0tFRUVqb28/6XEHDhzQjTfeqHPPPXdAzwcAAMz2reKmoaFB3d3dJ+z//PPP9dvf/nZAj1VdXa2ysjI5nU5lZmaqtrZW8fHxqqur6/OYQCCgK664QitXrtS0adMGPD8AADDXqIEs/uMf/xj+57feeks+ny98OxAIqL6+Xqmpqf1+vO7ubjU3N6uioiK8LyYmRoWFhWpsbOzzuNtvv11JSUm65pprBhxTAADAbAOKm+zsbNlsNtlstl7ffhozZozuv//+fj9eR0eHAoGAkpOTe+xPTk7WO++80+sxO3fu1KZNm7Rnz55+PYff75ff7w/f7uzs7Pd8AABg+BlQ3HzwwQcKhUKaNm2ampqaNGnSpPB9sbGxSkpKkt1uH/Qhv3Ls2DFdddVV2rhxoxITE/t1jMfj0cqVK4dsJgAAEF0GFDdTp06VJAWDwUF58sTERNntdrW1tfXY39bWppSUlBPW//nPf9aBAwd08cUXh/d9NcuoUaO0f/9+TZ8+vccxFRUVcrvd4dudnZ1KS0sblPkBAED06XfcPPvss1q0aJFGjx6tZ5999qRr//mf/7lfjxkbG6ucnBx5vd7wx7mDwaC8Xq9cLtcJ6zMyMrR3794e+2655RYdO3ZMa9eu7TVaHA6HHA5Hv+YBAADDX7/jZsmSJfL5fEpKSjrp98rYbDYFAoF+D+B2u1VaWqrc3Fzl5eWppqZGXV1dcjqdkqSSkhKlpqbK4/EoLi5OZ511Vo/jJ0yYIEkn7AcAACNTv+Pm79+KGqy3pSSpuLhYR44cUWVlpXw+n7Kzs1VfXx++yLilpUUxMVHxXYMAAGAYGNA1N3/P6/XK6/Wqvb29R+zYbDZt2rRpQI/lcrl6fRtK+vI7dU5m8+bNA3ouAABgtm8VNytXrtTtt9+u3NxcTZ48WTabbbDnAgAA+Fa+VdzU1tZq8+bNuuqqqwZ7HgAAgO/kW13M0t3drXnz5g32LAAAAN/Zt4qba6+9Vlu2bBnsWQAAAL6zfr8t9fdfhBcMBrVhwwa9/PLLmjt3rkaPHt1jbXV19eBNCAAAMAD9jps333yzx+3s7GxJ0r59+3rsH84XF+fc9H+tHmFQNN9dYvUIAABYpt9x88orrwzlHAAAAIOCb8cDAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEaJirhZt26d0tPTFRcXp/z8fDU1NfW59sknn1Rubq4mTJigU045RdnZ2Xr00UcjOC0AAIhmlsfNtm3b5Ha7VVVVpd27dysrK0tFRUVqb2/vdf3EiRO1YsUKNTY26o9//KOcTqecTqdeeOGFCE8OAACikeVxU11drbKyMjmdTmVmZqq2tlbx8fGqq6vrdf3ChQv1L//yL5o1a5amT5+u8vJyzZ07Vzt37ozw5AAAIBpZGjfd3d1qbm5WYWFheF9MTIwKCwvV2Nj4jceHQiF5vV7t379f5513Xq9r/H6/Ojs7e2wAAMBclsZNR0eHAoGAkpOTe+xPTk6Wz+fr87ijR49q7Nixio2N1eLFi3X//ffrwgsv7HWtx+NRQkJCeEtLSxvUcwAAANHF8relvo1x48Zpz5492rVrl1avXi23262GhoZe11ZUVOjo0aPhrbW1NbLDAgCAiBpl5ZMnJibKbrerra2tx/62tjalpKT0eVxMTIxmzJghScrOztbbb78tj8ejhQsXnrDW4XDI4XAM6twAACB6WfrKTWxsrHJycuT1esP7gsGgvF6vCgoK+v04wWBQfr9/KEYEAADDjKWv3EiS2+1WaWmpcnNzlZeXp5qaGnV1dcnpdEqSSkpKlJqaKo/HI+nLa2hyc3M1ffp0+f1+bd++XY8++qjWr19v5WkAAIAoYXncFBcX68iRI6qsrJTP51N2drbq6+vDFxm3tLQoJubrF5i6urp03XXX6cMPP9SYMWOUkZGhxx57TMXFxVadAgAAiCKWx40kuVwuuVyuXu/7nxcKr1q1SqtWrYrAVAAAYDgalp+WAgAA6AtxAwAAjELcAAAAoxA3AADAKMQNAAAwCnEDAACMQtwAAACjEDcAAMAoxA0AADAKcQMAAIxC3AAAAKMQNwAAwCjEDQAAMApxAwAAjELcAAAAoxA3AADAKMQNAAAwCnEDAACMQtwAAACjEDcAAMAoxA0AADAKcQMAAIxC3AAAAKMQNwAAwCjEDQAAMApxAwAAjELcAAAAoxA3AADAKMQNAAAwCnEDAACMQtwAAACjEDcAAMAoxA0AADAKcQMAAIxC3AAAAKMQNwAAwChRETfr1q1Tenq64uLilJ+fr6ampj7Xbty4Ueeee65OPfVUnXrqqSosLDzpegAAMLJYHjfbtm2T2+1WVVWVdu/eraysLBUVFam9vb3X9Q0NDbr88sv1yiuvqLGxUWlpafrRj36kjz76KMKTAwCAaGR53FRXV6usrExOp1OZmZmqra1VfHy86urqel3/y1/+Utddd52ys7OVkZGhhx56SMFgUF6vN8KTAwCAaGRp3HR3d6u5uVmFhYXhfTExMSosLFRjY2O/HuPTTz/V3/72N02cOLHX+/1+vzo7O3tsAADAXJbGTUdHhwKBgJKTk3vsT05Ols/n69djLFu2TFOmTOkRSH/P4/EoISEhvKWlpX3nuQEAQPSy/G2p72LNmjXaunWrnnrqKcXFxfW6pqKiQkePHg1vra2tEZ4SAABE0igrnzwxMVF2u11tbW099re1tSklJeWkx95zzz1as2aNXn75Zc2dO7fPdQ6HQw6HY1DmBQAA0c/SV25iY2OVk5PT42Lgry4OLigo6PO4X/ziF7rjjjtUX1+v3NzcSIwKAACGCUtfuZEkt9ut0tJS5ebmKi8vTzU1Nerq6pLT6ZQklZSUKDU1VR6PR5J01113qbKyUlu2bFF6enr42pyxY8dq7Nixlp0HAACIDpbHTXFxsY4cOaLKykr5fD5lZ2ervr4+fJFxS0uLYmK+foFp/fr16u7u1o9//OMej1NVVaXbbrstkqMDAIAoZHncSJLL5ZLL5er1voaGhh63Dxw4MPQDAQCAYWtYf1oKAADgfyJuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTL42bdunVKT09XXFyc8vPz1dTU1OfaP/3pT/q3f/s3paeny2azqaamJnKDAgCAYcHSuNm2bZvcbreqqqq0e/duZWVlqaioSO3t7b2u//TTTzVt2jStWbNGKSkpEZ4WAAAMB5bGTXV1tcrKyuR0OpWZmana2lrFx8errq6u1/XnnHOO7r77bl122WVyOBwRnhYAAAwHlsVNd3e3mpubVVhY+PUwMTEqLCxUY2OjVWMBAIBhbpRVT9zR0aFAIKDk5OQe+5OTk/XOO+8M2vP4/X75/f7w7c7OzkF7bAAAEH0sv6B4qHk8HiUkJIS3tLQ0q0cCAABDyLK4SUxMlN1uV1tbW4/9bW1tg3qxcEVFhY4ePRreWltbB+2xAQBA9LEsbmJjY5WTkyOv1xveFwwG5fV6VVBQMGjP43A4NH78+B4bAAAwl2XX3EiS2+1WaWmpcnNzlZeXp5qaGnV1dcnpdEqSSkpKlJqaKo/HI+nLi5Dfeuut8D9/9NFH2rNnj8aOHasZM2ZYdh4AACB6WBo3xcXFOnLkiCorK+Xz+ZSdna36+vrwRcYtLS2Kifn6xaVDhw7p7LPPDt++5557dM8992jBggVqaGiI9PgAACAKWRo3kuRyueRyuXq9738GS3p6ukKhUASmAgAAw5Xxn5YCAAAjC3EDAACMQtwAAACjEDcAAMAoxA0AADAKcQMAAIxC3AAAAKMQNwAAwCjEDQAAMApxAwAAjELcAAAAoxA3AADAKMQNAAAwCnEDAACMQtwAAACjEDcAAMAoxA0AADAKcQMAAIxC3AAAAKMQNwAAwCjEDQAAMApxAwAAjELcAAAAoxA3AADAKMQNAAAwCnEDAACMQtwAAACjEDcAAMAoxA0AADAKcQMAAIxC3AAAAKMQNwAAwCjEDQAAMApxAwAAjELcAAAAoxA3AADAKFERN+vWrVN6erri4uKUn5+vpqamk67/9a9/rYyMDMXFxWnOnDnavn17hCYFAADRzvK42bZtm9xut6qqqrR7925lZWWpqKhI7e3tva5/7bXXdPnll+uaa67Rm2++qSVLlmjJkiXat29fhCcHAADRyPK4qa6uVllZmZxOpzIzM1VbW6v4+HjV1dX1un7t2rX6p3/6J910002aNWuW7rjjDv3gBz/QAw88EOHJAQBANLI0brq7u9Xc3KzCwsLwvpiYGBUWFqqxsbHXYxobG3usl6SioqI+1wMAgJFllJVP3tHRoUAgoOTk5B77k5OT9c477/R6jM/n63W9z+frdb3f75ff7w/fPnr0qCSps7PzhLUB/2cDmj9a9XZuJ3Ps88AQTRJZAz3vLz77YogmiayBnnfXFyPzvD/zfzpEk0TWQM/787/9bYgmiayBnvfxz7uGaJLIGuh5//1/74az3s77q32hUOgbj7c0biLB4/Fo5cqVJ+xPS0uzYJrISLj/f1k9gjU8CVZPYImEZSPzvJUwMs/7/6yzegJrrHp8ZP55a5XVA1hjzZo1fd537NgxJXzDv/+Wxk1iYqLsdrva2tp67G9ra1NKSkqvx6SkpAxofUVFhdxud/h2MBjUxx9/rNNOO002m+07nsHAdHZ2Ki0tTa2trRo/fnxEn9tKnDfnPRJw3pz3SGDleYdCIR07dkxTpkz5xrWWxk1sbKxycnLk9Xq1ZMkSSV/Gh9frlcvl6vWYgoICeb1e3XDDDeF9L730kgoKCnpd73A45HA4euybMGHCYIz/rY0fP35E/cvwFc57ZOG8RxbOe2Sx6ry/6RWbr1j+tpTb7VZpaalyc3OVl5enmpoadXV1yel0SpJKSkqUmpoqj8cjSSovL9eCBQt07733avHixdq6daveeOMNbdiwwcrTAAAAUcLyuCkuLtaRI0dUWVkpn8+n7Oxs1dfXhy8abmlpUUzM1x/qmjdvnrZs2aJbbrlFN998s84880w9/fTTOuuss6w6BQAAEEUsjxtJcrlcfb4N1dDQcMK+Sy+9VJdeeukQTzX4HA6HqqqqTnibzHScN+c9EnDenPdIMFzO2xbqz2eqAAAAhgnLv6EYAABgMBE3AADAKMQNAAAwCnETAa+++qouvvhiTZkyRTabTU8//bTVI0WEx+PROeeco3HjxikpKUlLlizR/v37rR5ryK1fv15z584Nfw9EQUGBnn/+eavHirg1a9bIZrP1+E4qE912222y2Ww9toyMDKvHioiPPvpIV155pU477TSNGTNGc+bM0RtvvGH1WEMqPT39hD9vm82mpUuXWj3akAoEArr11lv1ve99T2PGjNH06dN1xx139OunEKwQFZ+WMl1XV5eysrJ09dVX61//9V+tHididuzYoaVLl+qcc87RF198oZtvvlk/+tGP9NZbb+mUU06xerwhc/rpp2vNmjU688wzFQqF9Mgjj+iSSy7Rm2++qdmzZ1s9XkTs2rVLDz74oObOnWv1KBExe/Zsvfzyy+Hbo0aZ/3+tf/3rXzV//nz94z/+o55//nlNmjRJ7733nk499VSrRxtSu3btUiDw9e/x7du3TxdeeOGw/ATvQNx1111av369HnnkEc2ePVtvvPGGnE6nEhIS9LOf/czq8U5g/r+BUWDRokVatGiR1WNEXH19fY/bmzdvVlJSkpqbm3XeeedZNNXQu/jii3vcXr16tdavX6/XX399RMTN8ePHdcUVV2jjxo1atWpk/DDOqFGj+vwJGFPdddddSktL08MPPxze973vfc/CiSJj0qRJPW6vWbNG06dP14IFCyyaKDJee+01XXLJJVq8eLGkL1/B+tWvfqWmpiaLJ+sdb0shYr76RfaJEydaPEnkBAIBbd26VV1dXX3+RIhpli5dqsWLF6uwsNDqUSLmvffe05QpUzRt2jRdccUVamlpsXqkIffss88qNzdXl156qZKSknT22Wdr48aNVo8VUd3d3Xrsscd09dVXR/y3CiNt3rx58nq9evfddyVJf/jDH7Rz586o/Ys7r9wgIoLBoG644QbNnz9/RHyb9N69e1VQUKDPP/9cY8eO1VNPPaXMzEyrxxpyW7du1e7du7Vr1y6rR4mY/Px8bd68WTNnztThw4e1cuVKnXvuudq3b5/GjRtn9XhD5i9/+YvWr18vt9utm2++Wbt27dLPfvYzxcbGqrS01OrxIuLpp5/WJ598op/85CdWjzLkli9frs7OTmVkZMhutysQCGj16tW64oorrB6tV8QNImLp0qXat2+fdu7cafUoETFz5kzt2bNHR48e1RNPPKHS0lLt2LHD6MBpbW1VeXm5XnrpJcXFxVk9TsT8/d9c586dq/z8fE2dOlWPP/64rrnmGgsnG1rBYFC5ubm68847JUlnn3229u3bp9ra2hETN5s2bdKiRYv69SvVw93jjz+uX/7yl9qyZYtmz56tPXv26IYbbtCUKVOi8s+buMGQc7lc+u///m+9+uqrOv30060eJyJiY2M1Y8YMSVJOTo527dqltWvX6sEHH7R4sqHT3Nys9vZ2/eAHPwjvCwQCevXVV/XAAw/I7/fLbrdbOGFkTJgwQd///vf1/vvvWz3KkJo8efIJsT5r1iz913/9l0UTRdbBgwf18ssv68knn7R6lIi46aabtHz5cl122WWSpDlz5ujgwYPyeDzEDUaWUCik66+/Xk899ZQaGhpGxMWGfQkGg/L7/VaPMaQuuOAC7d27t8c+p9OpjIwMLVu2bESEjfTlBdV//vOfddVVV1k9ypCaP3/+CV/t8O6772rq1KkWTRRZDz/8sJKSksIX2Jru008/7fEj1pJkt9sVDAYtmujkiJsIOH78eI+/xX3wwQfas2ePJk6cqDPOOMPCyYbW0qVLtWXLFj3zzDMaN26cfD6fJCkhIUFjxoyxeLqhU1FRoUWLFumMM87QsWPHtGXLFjU0NOiFF16werQhNW7cuBOupzrllFN02mmnGX2d1Y033qiLL75YU6dO1aFDh1RVVSW73a7LL7/c6tGG1M9//nPNmzdPd955p/793/9dTU1N2rBhgzZs2GD1aEMuGAzq4YcfVmlp6Yj42L/05adAV69erTPOOEOzZ8/Wm2++qerqal199dVWj9a7EIbcK6+8EpJ0wlZaWmr1aEOqt3OWFHr44YetHm1IXX311aGpU6eGYmNjQ5MmTQpdcMEFoRdffNHqsSyxYMGCUHl5udVjDKni4uLQ5MmTQ7GxsaHU1NRQcXFx6P3337d6rIj4zW9+EzrrrLNCDocjlJGREdqwYYPVI0XECy+8EJIU2r9/v9WjRExnZ2eovLw8dMYZZ4Ti4uJC06ZNC61YsSLk9/utHq1X/Co4AAAwCt9zAwAAjELcAAAAoxA3AADAKMQNAAAwCnEDAACMQtwAAACjEDcAAMAoxA0AADAKcQNgREtPT1dNTY3VYwAYRMQNgKixcOFC3XDDDVaPAWCYI24AAIBRiBsAUeEnP/mJduzYobVr18pms8lms+nAgQPasWOH8vLy5HA4NHnyZC1fvlxffPFF+LiFCxfK5XLJ5XIpISFBiYmJuvXWW/VtfzbvoYce0oQJE+T1egfr1ABEGHEDICqsXbtWBQUFKisr0+HDh3X48GGNHj1aF110kc455xz94Q9/0Pr167Vp0yatWrWqx7GPPPKIRo0apaamJq1du1bV1dV66KGHBjzDL37xCy1fvlwvvviiLrjggsE6NQARNsrqAQBAkhISEhQbG6v4+HilpKRIklasWKG0tDQ98MADstlsysjI0KFDh7Rs2TJVVlYqJubLv5+lpaXpvvvuk81m08yZM7V3717dd999Kisr6/fzL1u2TI8++qh27Nih2bNnD8k5AogMXrkBELXefvttFRQUyGazhffNnz9fx48f14cffhje98Mf/rDHmoKCAr333nsKBAL9ep57771XGzdu1M6dOwkbwADEDYAR79xzz1UgENDjjz9u9SgABgFxAyBqxMbG9ni1ZdasWWpsbOxxcfDvfvc7jRs3Tqeffnp43+9///sej/P666/rzDPPlN1u79fz5uXl6fnnn9edd96pe+655zueBQCrETcAokZ6erp+//vf68CBA+ro6NB1112n1tZWXX/99XrnnXf0zDPPqKqqSm63O3y9jSS1tLTI7XZr//79+tWvfqX7779f5eXlA3ruefPmafv27Vq5ciVf6gcMc1xQDCBq3HjjjSotLVVmZqY+++wzffDBB9q+fbtuuukmZWVlaeLEibrmmmt0yy239DiupKREn332mfLy8mS321VeXq7//M//HPDz/8M//IOee+45XXTRRbLb7br++usH69QARJAt9G2/DAIAosDChQuVnZ3Nqy0AwnhbCgAAGIW4AWCs3/72txo7dmyfGwAz8bYUAGN99tln+uijj/q8f8aMGRGcBkCkEDcAAMAovC0FAACMQtwAAACjEDcAAMAoxA0AADAKcQMAAIxC3AAAAKMQNwAAwCjEDQAAMMr/BywjKaNkUYJNAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import seaborn as sns\n",
    "\n",
    "sns.barplot(x='top_k', y='hit', data=hit_stat_df, errorbar=None)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7925564a-7d30-4914-baaf-4a00abb7686d",
   "metadata": {
    "papermill": {
     "duration": 0.109216,
     "end_time": "2024-11-23T14:35:26.464009",
     "exception": false,
     "start_time": "2024-11-23T14:35:26.354793",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# 生成答案"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "27132c3b-0051-4df6-bf57-fd804acb8d17",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:01:46.231773Z",
     "iopub.status.busy": "2024-12-04T14:01:46.231472Z",
     "iopub.status.idle": "2024-12-04T14:01:46.305942Z",
     "shell.execute_reply": "2024-12-04T14:01:46.305447Z",
     "shell.execute_reply.started": "2024-12-04T14:01:46.231758Z"
    },
    "papermill": {
     "duration": 0.199165,
     "end_time": "2024-11-23T14:35:27.323500",
     "exception": false,
     "start_time": "2024-11-23T14:35:27.124335",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_1231697/3342461511.py:3: LangChainDeprecationWarning: The class `Ollama` was deprecated in LangChain 0.3.1 and will be removed in 1.0.0. An updated version of the class exists in the :class:`~langchain-ollama package and should be used instead. To use it run `pip install -U :class:`~langchain-ollama` and import as `from :class:`~langchain_ollama import OllamaLLM``.\n",
      "  ollama_llm = Ollama(\n"
     ]
    }
   ],
   "source": [
    "from langchain.llms import Ollama\n",
    "\n",
    "ollama_llm = Ollama(\n",
    "    model='qwen2:7b-instruct',\n",
    "    base_url='http://localhost:11434',\n",
    "    top_k=1\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "50404beb-3be0-4aaa-b124-8c7a52b84531",
   "metadata": {
    "editable": true,
    "execution": {
     "iopub.execute_input": "2024-12-04T14:01:46.306588Z",
     "iopub.status.busy": "2024-12-04T14:01:46.306449Z",
     "iopub.status.idle": "2024-12-04T14:01:46.310353Z",
     "shell.execute_reply": "2024-12-04T14:01:46.309998Z",
     "shell.execute_reply.started": "2024-12-04T14:01:46.306575Z"
    },
    "papermill": {
     "duration": 0.159318,
     "end_time": "2024-11-23T14:35:26.768506",
     "exception": false,
     "start_time": "2024-11-23T14:35:26.609188",
     "status": "completed"
    },
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import time\n",
    "\n",
    "def rag(vector_db, llm, query, n_chunks=4):\n",
    "    prompt_tmpl = \"\"\"\n",
    "你是一个金融分析师，擅长根据所获取的信息片段，对问题进行分析和推理。\n",
    "你的任务是根据所获取的信息片段（<<<<context>>><<<</context>>>之间的内容）回答问题。\n",
    "回答保持简洁，不必重复问题，不要添加描述性解释和与答案无关的任何内容。\n",
    "已知信息：\n",
    "<<<<context>>>\n",
    "{{knowledge}}\n",
    "<<<</context>>>\n",
    "\n",
    "问题：{{query}}\n",
    "请回答：\n",
    "\"\"\".strip()\n",
    "    chunks = vector_db.similarity_search(query, k=n_chunks)\n",
    "    prompt = prompt_tmpl.replace('{{knowledge}}', '\\n\\n'.join([doc.page_content for doc in chunks])).replace('{{query}}', query)\n",
    "    retry_count = 3\n",
    "\n",
    "    resp = ''\n",
    "    while retry_count > 0:\n",
    "        try:\n",
    "            resp = llm.invoke(prompt)\n",
    "            break\n",
    "        except Exception as e:\n",
    "            retry_count -= 1\n",
    "            sleeping_seconds = 2 ** (4 - retry_count)\n",
    "            print(f\"query={query}, error={e}, sleeping={sleeping_seconds}, remaining retry count={retry_count}\")\n",
    "            \n",
    "            time.sleep(sleeping_seconds)\n",
    "    \n",
    "    return resp, chunks"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "95e5a804-2dc6-411c-ba71-6ccf765b2b73",
   "metadata": {
    "papermill": {
     "duration": 0.135973,
     "end_time": "2024-11-23T14:35:27.001401",
     "exception": false,
     "start_time": "2024-11-23T14:35:26.865428",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "## 预测"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "166392d8-f801-4372-b8ad-3e79aef0b350",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:01:46.311034Z",
     "iopub.status.busy": "2024-12-04T14:01:46.310851Z",
     "iopub.status.idle": "2024-12-04T14:01:46.317808Z",
     "shell.execute_reply": "2024-12-04T14:01:46.317403Z",
     "shell.execute_reply.started": "2024-12-04T14:01:46.311021Z"
    },
    "papermill": {
     "duration": 0.141864,
     "end_time": "2024-11-23T14:35:27.564409",
     "exception": false,
     "start_time": "2024-11-23T14:35:27.422545",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "prediction_df = qa_df[qa_df['dataset'] == 'test'][['uuid', 'question', 'qa_type', 'answer']].rename(columns={'answer': 'ref_answer'})\n",
    "\n",
    "def predict(vector_db, llm, prediction_df, n_chunks):\n",
    "    prediction_df = prediction_df.copy()\n",
    "    answer_dict = {}\n",
    "\n",
    "    for idx, row in tqdm(prediction_df.iterrows(), total=len(prediction_df)):\n",
    "        uuid = row['uuid']\n",
    "        question = row['question']\n",
    "        answer, chunks = rag(vector_db, llm, question, n_chunks=n_chunks)\n",
    "        assert len(chunks) <= n_chunks\n",
    "        answer_dict[question] = {\n",
    "            'uuid': uuid,\n",
    "            'ref_answer': row['ref_answer'],\n",
    "            'gen_answer': answer,\n",
    "            'chunks': chunks\n",
    "        }\n",
    "\n",
    "    prediction_df.loc[:, 'gen_answer'] = prediction_df['question'].apply(lambda q: answer_dict[q]['gen_answer'])\n",
    "    prediction_df.loc[:, 'chunks'] = prediction_df['question'].apply(lambda q: answer_dict[q]['chunks'])\n",
    "\n",
    "    return prediction_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "ca46d5f1-e698-457d-abb6-92d83cd59c66",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:01:46.318513Z",
     "iopub.status.busy": "2024-12-04T14:01:46.318345Z",
     "iopub.status.idle": "2024-12-04T14:04:27.595375Z",
     "shell.execute_reply": "2024-12-04T14:04:27.594779Z",
     "shell.execute_reply.started": "2024-12-04T14:01:46.318500Z"
    },
    "papermill": {
     "duration": 514.92352,
     "end_time": "2024-11-23T14:44:02.805529",
     "exception": false,
     "start_time": "2024-11-23T14:35:27.882009",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f30a6e40c89040769745b56f01b07c60",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "pred_df = predict(md_vector_db, ollama_llm, prediction_df, n_chunks=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "f7026bac-9927-4a33-85c0-bc1b35f3a603",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:04:27.596156Z",
     "iopub.status.busy": "2024-12-04T14:04:27.595979Z",
     "iopub.status.idle": "2024-12-04T14:04:27.771576Z",
     "shell.execute_reply": "2024-12-04T14:04:27.771005Z",
     "shell.execute_reply.started": "2024-12-04T14:04:27.596139Z"
    }
   },
   "outputs": [],
   "source": [
    "save_path = os.path.join(expr_dir, 'predictions.xlsx')\n",
    "pred_df.to_excel(save_path, index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7d79e974-089f-4c08-ba5e-804f6542e06a",
   "metadata": {
    "papermill": {
     "duration": 0.14423,
     "end_time": "2024-11-23T14:44:03.513124",
     "exception": false,
     "start_time": "2024-11-23T14:44:03.368894",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# 评估"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "217568fe-c0e4-49eb-9a7c-9fdfbc033d8a",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:04:27.772418Z",
     "iopub.status.busy": "2024-12-04T14:04:27.772254Z",
     "iopub.status.idle": "2024-12-04T14:04:27.981679Z",
     "shell.execute_reply": "2024-12-04T14:04:27.981232Z",
     "shell.execute_reply.started": "2024-12-04T14:04:27.772402Z"
    },
    "papermill": {
     "duration": 0.369729,
     "end_time": "2024-11-23T14:44:04.017198",
     "exception": false,
     "start_time": "2024-11-23T14:44:03.647469",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "from langchain_openai import ChatOpenAI\n",
    "import time\n",
    "\n",
    "judge_llm = ChatOpenAI(\n",
    "    api_key=os.environ['LLM_API_KEY'],\n",
    "    base_url=os.environ['LLM_BASE_URL'],\n",
    "    model_name='qwen2-72b-instruct',\n",
    "    temperature=0\n",
    ")\n",
    "\n",
    "def evaluate(prediction_df):\n",
    "    \"\"\"\n",
    "    对预测结果进行打分\n",
    "    :param prediction_df: 预测结果，需要包含问题，参考答案，生成的答案，列名分别为question, ref_answer, gen_answer\n",
    "    :return 打分模型原始返回结果\n",
    "    \"\"\"\n",
    "    prompt_tmpl = \"\"\"\n",
    "你是一个经济学博士，现在我有一系列问题，有一个助手已经对这些问题进行了回答，你需要参照参考答案，评价这个助手的回答是否正确，仅回复“是”或“否”即可，不要带其他描述性内容或无关信息。\n",
    "问题：\n",
    "<question>\n",
    "{{question}}\n",
    "</question>\n",
    "\n",
    "参考答案：\n",
    "<ref_answer>\n",
    "{{ref_answer}}\n",
    "</ref_answer>\n",
    "\n",
    "助手回答：\n",
    "<gen_answer>\n",
    "{{gen_answer}}\n",
    "</gen_answer>\n",
    "请评价：\n",
    "    \"\"\"\n",
    "    results = []\n",
    "\n",
    "    for _, row in tqdm(prediction_df.iterrows(), total=len(prediction_df)):\n",
    "        question = row['question']\n",
    "        ref_answer = row['ref_answer']\n",
    "        gen_answer = row['gen_answer']\n",
    "\n",
    "        prompt = prompt_tmpl.replace('{{question}}', question).replace('{{ref_answer}}', str(ref_answer)).replace('{{gen_answer}}', gen_answer).strip()\n",
    "        \n",
    "        retry_count = 3\n",
    "        result = ''\n",
    "        \n",
    "        while retry_count > 0:\n",
    "            try:\n",
    "                result = judge_llm.invoke(prompt).content\n",
    "                break\n",
    "            except Exception as e:\n",
    "                retry_count -= 1\n",
    "                sleeping_seconds = 2 ** (4 - retry_count)\n",
    "                print(f\"query={question}, error={e}, sleeping={sleeping_seconds}, remaining retry count={retry_count}\")\n",
    "                \n",
    "                time.sleep(sleeping_seconds)\n",
    "        \n",
    "        results.append(result)\n",
    "\n",
    "        time.sleep(1)\n",
    "    return results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "71db81af-b8f9-47ba-958b-761896516605",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:04:27.982555Z",
     "iopub.status.busy": "2024-12-04T14:04:27.982250Z",
     "iopub.status.idle": "2024-12-04T14:07:04.667061Z",
     "shell.execute_reply": "2024-12-04T14:07:04.664642Z",
     "shell.execute_reply.started": "2024-12-04T14:04:27.982542Z"
    },
    "papermill": {
     "duration": 150.566109,
     "end_time": "2024-11-23T14:46:34.714324",
     "exception": false,
     "start_time": "2024-11-23T14:44:04.148215",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "dc5d1365fa874336a19a0f4fbfebcdd2",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "pred_df['raw_score'] = evaluate(pred_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "7da1b98e-99aa-4e11-9297-91eac1c62493",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:07:04.670667Z",
     "iopub.status.busy": "2024-12-04T14:07:04.669899Z",
     "iopub.status.idle": "2024-12-04T14:07:04.684790Z",
     "shell.execute_reply": "2024-12-04T14:07:04.682490Z",
     "shell.execute_reply.started": "2024-12-04T14:07:04.670595Z"
    },
    "papermill": {
     "duration": 0.138037,
     "end_time": "2024-11-23T14:46:35.040595",
     "exception": false,
     "start_time": "2024-11-23T14:46:34.902558",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['是', '否',\n",
       "       '否\\n\\n（注：虽然纽约联储全球供应链压力指数下降可能表明供应链状况改善，但这并不一定意味着供应链已经完全恢复到正常水平或历史最低压力状态。因此，“持续恢复”和“降至有记录以来的最低值”这两个表述并不等价。） \\n\\n但是，根据题目要求，我只应该回答“是”或“否”，没有解释。所以，我的回答是“否”。'],\n",
       "      dtype=object)"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pred_df['raw_score'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "2c99c078-d294-40b8-b57b-31cfd7349c3e",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:07:04.687497Z",
     "iopub.status.busy": "2024-12-04T14:07:04.686957Z",
     "iopub.status.idle": "2024-12-04T14:07:04.697191Z",
     "shell.execute_reply": "2024-12-04T14:07:04.696254Z",
     "shell.execute_reply.started": "2024-12-04T14:07:04.687445Z"
    },
    "papermill": {
     "duration": 0.107466,
     "end_time": "2024-11-23T14:46:35.243603",
     "exception": false,
     "start_time": "2024-11-23T14:46:35.136137",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "pred_df['score'] = (pred_df['raw_score'] == '是').astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "423897f2-786e-415b-a613-55a4359faf76",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:07:04.698513Z",
     "iopub.status.busy": "2024-12-04T14:07:04.698135Z",
     "iopub.status.idle": "2024-12-04T14:07:04.705075Z",
     "shell.execute_reply": "2024-12-04T14:07:04.704333Z",
     "shell.execute_reply.started": "2024-12-04T14:07:04.698483Z"
    },
    "papermill": {
     "duration": 0.094328,
     "end_time": "2024-11-23T14:46:35.431162",
     "exception": false,
     "start_time": "2024-11-23T14:46:35.336834",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.68"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pred_df['score'].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "79325429-9cf1-4e2c-95ac-cb0c1a3b6156",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:07:04.706080Z",
     "iopub.status.busy": "2024-12-04T14:07:04.705871Z",
     "iopub.status.idle": "2024-12-04T14:07:04.834366Z",
     "shell.execute_reply": "2024-12-04T14:07:04.833763Z",
     "shell.execute_reply.started": "2024-12-04T14:07:04.706060Z"
    },
    "papermill": {
     "duration": 0.289336,
     "end_time": "2024-11-23T14:46:35.804651",
     "exception": false,
     "start_time": "2024-11-23T14:46:35.515315",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "save_path = os.path.join(expr_dir, 'eval_df.xlsx')\n",
    "pred_df.to_excel(save_path, index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e9264087-cea4-4131-98eb-7875b0cbaddf",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T08:01:27.295186Z",
     "iopub.status.busy": "2024-12-04T08:01:27.294401Z",
     "iopub.status.idle": "2024-12-04T08:01:27.302698Z",
     "shell.execute_reply": "2024-12-04T08:01:27.301299Z",
     "shell.execute_reply.started": "2024-12-04T08:01:27.295113Z"
    }
   },
   "source": [
    "# 结果分析"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "88fc7227-9c21-48da-b179-5070406eb113",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:07:04.835268Z",
     "iopub.status.busy": "2024-12-04T14:07:04.835073Z",
     "iopub.status.idle": "2024-12-04T14:07:04.840712Z",
     "shell.execute_reply": "2024-12-04T14:07:04.839966Z",
     "shell.execute_reply.started": "2024-12-04T14:07:04.835249Z"
    },
    "papermill": {
     "duration": 0.088622,
     "end_time": "2024-11-23T14:46:36.016801",
     "exception": false,
     "start_time": "2024-11-23T14:46:35.928179",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "pred_df['avg_chunk_len'] = pred_df['chunks'].apply(lambda chunks: sum([len(d.page_content) for d in chunks]) / len(chunks))\n",
    "pred_df['max_chunk_len'] = pred_df['chunks'].apply(lambda chunks: max([len(d.page_content) for d in chunks]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "fb1a3e72-cb28-419a-8f6c-a0ef5d34c67a",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:07:04.845069Z",
     "iopub.status.busy": "2024-12-04T14:07:04.844827Z",
     "iopub.status.idle": "2024-12-04T14:07:04.848860Z",
     "shell.execute_reply": "2024-12-04T14:07:04.848013Z",
     "shell.execute_reply.started": "2024-12-04T14:07:04.845045Z"
    }
   },
   "outputs": [],
   "source": [
    "import seaborn as sns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "ff2413c8-f7a7-4e70-9178-c42ff2427426",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:07:04.850082Z",
     "iopub.status.busy": "2024-12-04T14:07:04.849804Z",
     "iopub.status.idle": "2024-12-04T14:07:04.943706Z",
     "shell.execute_reply": "2024-12-04T14:07:04.943255Z",
     "shell.execute_reply.started": "2024-12-04T14:07:04.850056Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Axes: xlabel='score', ylabel='avg_chunk_len'>"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAGwCAYAAABPSaTdAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAvuklEQVR4nO3df1iUdb7/8deAMAPIYGAwmoimuxmJaWo61XpKDUSOlbFtu5ha68kTi2bSusaulWlJ61ZqZa61rnY22fbU5taapEiKKZiuhZq2blIedHWgoynKd2cUmO8fHedqUouGgXu8eT6ua65r7s/9ue/7/ZELeV2f+5fF6/V6BQAAYFJhRhcAAADQmgg7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1DoYXUAoaGpq0uHDhxUbGyuLxWJ0OQAAoBm8Xq9Onjyprl27KizswvM3hB1Jhw8fVnJystFlAACAABw8eFDdunW74HrCjqTY2FhJX/5j2e12g6sBAADNUVdXp+TkZN/f8Qsh7Ei+U1d2u52wAwDARebbLkHhAmUAAGBqhB0AAGBqhB0AAGBqhB0AAGBqhB0AAGBqhB0AAGBqhB0AAGBqhB0AAGBqhB0AAGBqhB0AAGBqhB0AAGBqhB0AAGBqvAgUzeL1euV2u40uo8W8Xq88Ho8kyWq1fuvL40KdzWa76McAAK2NsINmcbvdyszMNLoMfE1xcbGioqKMLgMAQhqnsQAAgKkxs4NmsdlsKi4uNrqMFnO73Ro7dqwkadWqVbLZbAZX1DIXe/0A0BYIO2gWi8ViutMlNpvNdGMCAJyL01gAAMDUCDsAAMDUCDsAAMDUCDsAAMDUCDsAAMDUCDsAAMDUCDsAAMDUCDsAAMDUCDsAAMDUCDsAAMDUCDsAAMDUCDsAAMDUCDsAAMDUCDsAAMDUCDsAAMDUCDsAAMDUCDsAAMDUCDsAAMDUQirsPPnkk7JYLHrggQd8bTfeeKMsFovf57777vPbrrq6WllZWYqOjlZiYqJmzJihhoaGNq4eAACEog5GF3DW9u3btXTpUvXr1++cdffee6/mzJnjW46OjvZ9b2xsVFZWlhwOh8rLy3XkyBFNmDBBERERmjdvXpvUDgAAQldIhJ1Tp05p3Lhxeumll/T444+fsz46OloOh+O8265bt0579+7V+vXrlZSUpP79+2vu3LmaOXOmZs+ercjIyNYuHwAual6vV2632+gyWszr9crj8UiSrFarLBaLwRW1jM1mu+jHECpCIuzk5eUpKytLI0eOPG/YWblypV555RU5HA6NGTNGDz/8sG92p6KiQmlpaUpKSvL1z8jIUG5urvbs2aMBAwacsz+Px+P7hZCkurq6VhgVAFwc3G63MjMzjS4DX1NcXKyoqCijyzAFw8POq6++qg8++EDbt28/7/qcnBylpKSoa9eu2rVrl2bOnKl9+/bpjTfekCS5XC6/oCPJt+xyuc67z8LCQj322GNBHAUAAAhVhoadgwcPatq0aSopKZHNZjtvn8mTJ/u+p6WlqUuXLhoxYoSqqqrUq1evgI5bUFCg/Px833JdXZ2Sk5MD2hcAXOxsNpuKi4uNLqPF3G63xo4dK0latWrVBf+uXCwu9vpDiaFhZ8eOHaqtrdU111zja2tsbNSmTZv0/PPPy+PxKDw83G+bIUOGSJL279+vXr16yeFwaNu2bX59ampqJOmC1/lYrVZZrdZgDgUALloWi8V0p0tsNpvpxoTAGXrr+YgRI7R7925VVlb6PoMGDdK4ceNUWVl5TtCRpMrKSklSly5dJElOp1O7d+9WbW2tr09JSYnsdrtSU1PbZBwAACB0GTqzExsbq759+/q1xcTEKCEhQX379lVVVZWKioo0evRoJSQkaNeuXZo+fbqGDRvmu0U9PT1dqampGj9+vObPny+Xy6VZs2YpLy+P2RsAAGD8BcrfJDIyUuvXr9fChQtVX1+v5ORkZWdna9asWb4+4eHhWr16tXJzc+V0OhUTE6OJEyf6PZcHAAC0XyEXdjZu3Oj7npycrLKysm/dJiUlRWvWrGnFqgAAwMUqpF4XAQAAEGyEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGohFXaefPJJWSwWPfDAA742t9utvLw8JSQkqGPHjsrOzlZNTY3fdtXV1crKylJ0dLQSExM1Y8YMNTQ0tHH1AAAgFIVM2Nm+fbuWLl2qfv36+bVPnz5df/3rX/Xaa6+prKxMhw8f1u233+5b39jYqKysLJ0+fVrl5eV6+eWXtWLFCj3yyCNtPQQAABCCQiLsnDp1SuPGjdNLL72kSy65xNd+4sQJLVu2TM8884yGDx+ugQMHavny5SovL9fWrVslSevWrdPevXv1yiuvqH///srMzNTcuXO1ePFinT59+rzH83g8qqur8/sAAABzComwk5eXp6ysLI0cOdKvfceOHTpz5oxfe58+fdS9e3dVVFRIkioqKpSWlqakpCRfn4yMDNXV1WnPnj3nPV5hYaHi4uJ8n+Tk5FYYFQAACAWGh51XX31VH3zwgQoLC89Z53K5FBkZqU6dOvm1JyUlyeVy+fp8NeicXX923fkUFBToxIkTvs/BgweDMBIAABCKOhh58IMHD2ratGkqKSmRzWZrs+NarVZZrdY2Ox4AADCOoTM7O3bsUG1tra655hp16NBBHTp0UFlZmZ599ll16NBBSUlJOn36tI4fP+63XU1NjRwOhyTJ4XCcc3fW2eWzfQAAQPtlaNgZMWKEdu/ercrKSt9n0KBBGjdunO97RESESktLfdvs27dP1dXVcjqdkiSn06ndu3ertrbW16ekpER2u12pqaltPiYAABBaDD2NFRsbq759+/q1xcTEKCEhwdc+adIk5efnKz4+Xna7XVOnTpXT6dTQoUMlSenp6UpNTdX48eM1f/58uVwuzZo1S3l5eZyqAgAAxoad5liwYIHCwsKUnZ0tj8ejjIwMvfDCC7714eHhWr16tXJzc+V0OhUTE6OJEydqzpw5BlYNAABCRciFnY0bN/ot22w2LV68WIsXL77gNikpKVqzZk0rVwYAAC5Ght96DgAA0JoIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNRC7kWgAHAx8Xq9crvdRpcBye/nwM8kdNhsNlksFkNrIOwAQAu43W5lZmYaXQa+ZuzYsUaXgP9TXFysqKgoQ2vgNBYAADA1ZnYAIEiev+GYrOFeo8tot7xe6XTTl98jwySDz5y0a55Gi6Zsjje6DB/CDgAEiTXcK2u40VW0bzajC8D/Ca3QT9hpA1zAGDq4gDE0hcIFjADMi7DTBriAMTRxAWPoCIULGAGYFxcoAwAAU2Nmp42d6v8TecP4ZzeM1ys1NXz5PawDVzAayNLUoI6VfzS6DADtAH9125g3rIMUHmF0Ge1cpNEFQKF2+SIAM+M0FgAAMDXCDgAAMDXCDgAAMDXCDgAAMDXCDgAAMDXCDgAAMDXCDgAAMDXCDgAAMDXCDgAAMDXCDgAAMDXCDgAAMDXCDgAAMDXCDgAAMDXCDgAAMDVDw86SJUvUr18/2e122e12OZ1OFRcX+9bfeOONslgsfp/77rvPbx/V1dXKyspSdHS0EhMTNWPGDDU0NLT1UAAAQIjqYOTBu3XrpieffFLf+9735PV69fLLL+vWW2/Vhx9+qKuuukqSdO+992rOnDm+baKjo33fGxsblZWVJYfDofLych05ckQTJkxQRESE5s2b1+bjAQAAocfQsDNmzBi/5SeeeEJLlizR1q1bfWEnOjpaDofjvNuvW7dOe/fu1fr165WUlKT+/ftr7ty5mjlzpmbPnq3IyMhWHwMAAAhthoadr2psbNRrr72m+vp6OZ1OX/vKlSv1yiuvyOFwaMyYMXr44Yd9szsVFRVKS0tTUlKSr39GRoZyc3O1Z88eDRgw4LzH8ng88ng8vuW6urpWGhWA9sTTaHQFQGgItd+FgMJOY2OjVqxYodLSUtXW1qqpqclv/bvvvtvsfe3evVtOp1Nut1sdO3bUqlWrlJqaKknKyclRSkqKunbtql27dmnmzJnat2+f3njjDUmSy+XyCzqSfMsul+uCxywsLNRjjz3W7BoBoDmmbE4wugQA5xFQ2Jk2bZpWrFihrKws9e3bVxaLJeACrrjiClVWVurEiRN6/fXXNXHiRJWVlSk1NVWTJ0/29UtLS1OXLl00YsQIVVVVqVevXgEfs6CgQPn5+b7luro6JScnB7w/AAAQugIKO6+++qr++7//W6NHj25xAZGRkerdu7ckaeDAgdq+fbsWLVqkpUuXntN3yJAhkqT9+/erV69ecjgc2rZtm1+fmpoaSbrgdT6SZLVaZbVaW1w7AHzV8zcclTXc6CoA43kaQ2umM6Cw89WAEmxNTU1+19N8VWVlpSSpS5cukiSn06knnnhCtbW1SkxMlCSVlJTIbrf7ToUBQFuxhouwA4SggJ6z8+CDD2rRokXyer0tOnhBQYE2bdqkAwcOaPfu3SooKNDGjRs1btw4VVVVae7cudqxY4cOHDigt956SxMmTNCwYcPUr18/SVJ6erpSU1M1fvx47dy5U2vXrtWsWbOUl5fHzA0AAJAU4MzO5s2btWHDBhUXF+uqq65SRESE3/qzFxB/m9raWk2YMEFHjhxRXFyc+vXrp7Vr1+rmm2/WwYMHtX79ei1cuFD19fVKTk5Wdna2Zs2a5ds+PDxcq1evVm5urpxOp2JiYjRx4kS/5/KEnMYzRlcAhAZ+FwC0kYDCTqdOnTR27NgWH3zZsmUXXJecnKyysrJv3UdKSorWrFnT4lraSuzOV40uAQCAdiWgsLN8+fJg1wEAANAqAn6oYENDgzZu3Kiqqirl5OQoNjZWhw8flt1uV8eOHYNZo6mcvPrHUnjEt3cEzK7xDDOdANpEQGHnf/7nfzRq1ChVV1fL4/Ho5ptvVmxsrH7961/L4/Hot7/9bbDrNI/wCMIOAABtKKC7saZNm6ZBgwbpiy++UFRUlK997NixKi0tDVpxAAAALRXQzM57772n8vLyc1602aNHD/3zn/8MSmEAAADBENDMTlNTkxobz33L16FDhxQbG9viogAAAIIloLCTnp6uhQsX+pYtFotOnTqlRx99NCivkAAAAAiWgE5jPf3008rIyFBqaqrcbrdycnL0ySefqHPnzvrjH/8Y7BoBAAACFlDY6datm3bu3KlXX31Vu3bt0qlTpzRp0iSNGzfO74JlAAAAowX8nJ0OHTrorrvuCmYtAAAAQdfssPPWW281e6e33HJLQMUAAAAEW7PDzm233dasfhaL5bx3agEAABih2WGnqampNesAAABoFQHdet5caWlpOnjwYGseAgAA4Bu1atg5cOCAzpw505qHAAAA+EatGnYAAACMRtgBAACmRtgBAACmRtgBAACmRtgBAACmFlDYOXTo0AXXbd261fd96dKlSkpKCuQQAAAAQRFQ2ElPT9exY8fOad+yZYtGjRrlW87JyVFMTEzg1QEAALRQQGFn6NChSk9P18mTJ31tmzZt0ujRo/Xoo48GrTgAAICWCijs/O53v1P37t01ZswYeTwebdiwQVlZWZozZ46mT58e7BoBAAACFlDYCQsL06uvvqqIiAgNHz5ct9xyiwoLCzVt2rRg1wcAANAizX4R6K5du85pmz17tn7yk5/orrvu0rBhw3x9+vXrF7wKAQAAWqDZYad///6yWCzyer2+trPLS5cu1Ysvviiv1yuLxaLGxsZWKRYAAOC7anbY+eyzz1qzDgAAgFbR7LCTkpLSmnUAAAC0imaHna/75JNPtGHDBtXW1qqpqclv3SOPPNLiwgAAAIIhoLDz0ksvKTc3V507d5bD4ZDFYvGts1gshB0AABAyAgo7jz/+uJ544gnNnDkz2PUAAAAEVUDP2fniiy90xx13BLsWAACAoAso7Nxxxx1at25dsGsBAAAIuoBOY/Xu3VsPP/ywtm7dqrS0NEVERPitv//++4NSHAAAQEsFFHZefPFFdezYUWVlZSorK/NbZ7FYCDsAACBkBBR2eMAgAAC4WAR0zU6wLFmyRP369ZPdbpfdbpfT6VRxcbFvvdvtVl5enhISEtSxY0dlZ2erpqbGbx/V1dXKyspSdHS0EhMTNWPGDDU0NLT1UAAAQIgKaGbnpz/96Teu//3vf9+s/XTr1k1PPvmkvve978nr9erll1/Wrbfeqg8//FBXXXWVpk+frrfffluvvfaa4uLiNGXKFN1+++3asmWLJKmxsVFZWVlyOBwqLy/XkSNHNGHCBEVERGjevHmBDA0AAJhMQGHniy++8Fs+c+aMPvroIx0/flzDhw9v9n7GjBnjt/zEE09oyZIl2rp1q7p166Zly5apqKjIt8/ly5fryiuv1NatWzV06FCtW7dOe/fu1fr165WUlKT+/ftr7ty5mjlzpmbPnq3IyMjzHtfj8cjj8fiW6+rqml1zS1maGuT99m5oLV6v1PR/M39hHaSvPBATbcvSxAwsgLYRUNhZtWrVOW1NTU3Kzc1Vr169AiqksbFRr732murr6+V0OrVjxw6dOXNGI0eO9PXp06ePunfvroqKCg0dOlQVFRVKS0tTUlKSr09GRoZyc3O1Z88eDRgw4LzHKiws1GOPPRZQnS3VsfKPhhwXAID2KmjX7ISFhSk/P18LFiz4Ttvt3r1bHTt2lNVq1X333adVq1YpNTVVLpdLkZGR6tSpk1//pKQkuVwuSZLL5fILOmfXn113IQUFBTpx4oTvc/Dgwe9UMwAAuHgE/CLQ86mqqvrOFwdfccUVqqys1IkTJ/T6669r4sSJ59zOHmxWq1VWq7VVj/FVNpvN78JrGMftdmvs2LGSvpyhtNlsBlcESfwcALSqgMJOfn6+37LX69WRI0f09ttva+LEid9pX5GRkerdu7ckaeDAgdq+fbsWLVqkO++8U6dPn9bx48f9ZndqamrkcDgkSQ6HQ9u2bfPb39m7tc72CQUWi0VRUVFGl4Gvsdls/FwAoB0IKOx8+OGHfsthYWG69NJL9fTTT3/rnVrfpqmpSR6PRwMHDlRERIRKS0uVnZ0tSdq3b5+qq6vldDolSU6nU0888YRqa2uVmJgoSSopKZHdbldqamqL6gAAAOYQUNjZsGFDUA5eUFCgzMxMde/eXSdPnlRRUZE2btyotWvXKi4uTpMmTVJ+fr7i4+Nlt9s1depUOZ1ODR06VJKUnp6u1NRUjR8/XvPnz5fL5dKsWbOUl5fXpqepAABA6ArqNTvfVW1trSZMmKAjR44oLi5O/fr109q1a3XzzTdLkhYsWKCwsDBlZ2fL4/EoIyNDL7zwgm/78PBwrV69Wrm5uXI6nYqJidHEiRM1Z84co4YEAABCTEBhp6amRj//+c9VWlqq2tpaeb3+T45pbGxs1n6WLVv2jettNpsWL16sxYsXX7BPSkqK1qxZ06zjAQCA9iegsHP33XerurpaDz/8sLp06SILD2YDAAAhKqCws3nzZr333nvq379/kMsBAAAIroAeKpicnHzOqSsAAIBQFFDYWbhwoR566CEdOHAgyOUAAAAEV7NPY11yySV+1+bU19erV69eio6OVkREhF/fY8eOBa9CAACAFmh22Fm4cGErlgEAANA6mh12vutrIAAAAEJBQNfsrFmzRmvXrj2nfd26dbzwEgAAhJSAws5DDz103gcHNjU16aGHHmpxUQAAAMES0HN2Pvnkk/O+aLNPnz7av39/i4sCgIuRp9EiicdyGMXrlU43ffk9MkziebfG+fJ3IXQEFHbi4uL06aefqkePHn7t+/fvV0xMTDDqAoCLzpTN8UaXAOA8AjqNdeutt+qBBx5QVVWVr23//v168MEHdcsttwStOAAAgJYKaGZn/vz5GjVqlPr06aNu3bpJkg4dOqQf/OAHeuqpp4JaIACEMpvNxo0ZIcLtdmvs2LGSpFWrVslmsxlcESSFxM8h4NNY5eXlKikp0c6dOxUVFaV+/fpp2LBhwa4PAEKaxWJRVFSU0WXga2w2Gz8X+AQUdqQvf8HT09OVnp5+wT5paWlas2aNkpOTAz0MAABAiwR0zU5zHThwQGfOnGnNQwAAAHyjVg07AAAARiPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAU2vVsLN06VIlJSW15iEAAAC+UUDP2Xn22WfP226xWGSz2dS7d28NGzZMOTk5LSoOAACgpQIKOwsWLNDnn3+u//f//p8uueQSSdIXX3yh6OhodezYUbW1tbr88su1YcMGHigIAAAMFdBprHnz5mnw4MH65JNPdPToUR09elT/+Mc/NGTIEC1atEjV1dVyOByaPn16sOsFAAD4TgKa2Zk1a5b+/Oc/q1evXr623r1766mnnlJ2drY+/fRTzZ8/X9nZ2UErFAAAIBABzewcOXJEDQ0N57Q3NDTI5XJJkrp27aqTJ0+2rDoAAIAWCijs3HTTTfrP//xPffjhh762Dz/8ULm5uRo+fLgkaffu3erZs2dwqgQAAAhQQGFn2bJlio+P18CBA2W1WmW1WjVo0CDFx8dr2bJlkqSOHTvq6aefDmqxAAAA31VA1+w4HA6VlJTo73//u/7xj39Ikq644gpdccUVvj433XRTcCoEAABogYDCzubNm3XDDTeoT58+6tOnT7BrAgAACJqATmMNHz5cPXv21C9/+Uvt3bs32DUBAAAETUBh5/Dhw3rwwQdVVlamvn37qn///vrNb36jQ4cOBbs+AACAFgko7HTu3FlTpkzRli1bVFVVpTvuuEMvv/yyevTo4bsbCwAAIBS0+EWgPXv21EMPPaQnn3xSaWlpKisrC0ZdAAAAQdGisLNlyxb97Gc/U5cuXZSTk6O+ffvq7bffDlZtAAAALRZQ2CkoKFDPnj110003qbq6WosWLZLL5dIf/vAHjRo1qtn7KSws1ODBgxUbG6vExETddttt2rdvn1+fG2+8URaLxe9z3333+fWprq5WVlaWoqOjlZiYqBkzZpz3Cc8AAKD9CejW802bNmnGjBn60Y9+pM6dOwd88LKyMuXl5Wnw4MFqaGjQL3/5S6Wnp2vv3r2KiYnx9bv33ns1Z84c33J0dLTve2Njo7KysuRwOFReXq4jR45owoQJioiI0Lx58wKuDQAAmENAYWfLli2SpL179+pvf/ubTp8+7bf+lltuadZ+3nnnHb/lFStWKDExUTt27NCwYcN87dHR0XI4HOfdx7p167R3716tX79eSUlJ6t+/v+bOnauZM2dq9uzZioyM/C5DAwAAJhNQ2Pnss880duxY7dq1SxaLRV6vV5JksVgkfTnbEogTJ05IkuLj4/3aV65cqVdeeUUOh0NjxozRww8/7JvdqaioUFpampKSknz9MzIylJubqz179mjAgAHnHMfj8cjj8fiW6+rqAqoXAACEvoCu2bn//vvVo0cP1dbWKjo6Wnv27NGmTZs0aNAgbdy4MaBCmpqa9MADD+j6669X3759fe05OTl65ZVXtGHDBhUUFOgPf/iD7rrrLt96l8vlF3Qk+ZbPvoH96woLCxUXF+f7JCcnB1QzAAAIfQHN7FRUVOjdd99V586dFRYWprCwMN1www0qLCzU/fff7/c29ObKy8vTRx99pM2bN/u1T5482fc9LS1NXbp00YgRI1RVVaVevXoFUr4KCgqUn5/vW66rqyPwAABgUgHN7DQ2Nio2NlbSlw8YPHz4sCQpJSXlnLupmmPKlClavXq1NmzYoG7dun1j3yFDhkiS9u/fL+nLl5LW1NT49Tm7fKHrfKxWq+x2u98HAACYU0Bhp2/fvtq5c6ekL8PH/PnztWXLFs2ZM0eXX355s/fj9Xo1ZcoUrVq1Su+++6569uz5rdtUVlZKkrp06SJJcjqd2r17t2pra319SkpKZLfblZqa+h1GBQAAzCig01izZs1SfX29JGnOnDn693//d/3gBz9QQkKC/vSnPzV7P3l5eSoqKtKbb76p2NhY3zU2cXFxioqKUlVVlYqKijR69GglJCRo165dmj59uoYNG6Z+/fpJktLT05Wamqrx48dr/vz5crlcmjVrlvLy8mS1WgMZHgAAMJGAwk5GRobve+/evfX3v/9dx44d0yWXXOK7I6s5lixZIunLBwd+1fLly3X33XcrMjJS69ev18KFC1VfX6/k5GRlZ2dr1qxZvr7h4eFavXq1cnNz5XQ6FRMTo4kTJ/o9lwcAALRfAYWd8/n67eLNcfaW9QtJTk5u1ru2UlJStGbNmu98fAAAYH4tfhEoAABAKCPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUzM07BQWFmrw4MGKjY1VYmKibrvtNu3bt8+vj9vtVl5enhISEtSxY0dlZ2erpqbGr091dbWysrIUHR2txMREzZgxQw0NDW05FAAAEKIMDTtlZWXKy8vT1q1bVVJSojNnzig9PV319fW+PtOnT9df//pXvfbaayorK9Phw4d1++23+9Y3NjYqKytLp0+fVnl5uV5++WWtWLFCjzzyiBFDAgAAIaaDkQd/5513/JZXrFihxMRE7dixQ8OGDdOJEye0bNkyFRUVafjw4ZKk5cuX68orr9TWrVs1dOhQrVu3Tnv37tX69euVlJSk/v37a+7cuZo5c6Zmz56tyMhII4ZmOl6vV2632+gyWuyrYzDDeGw2mywWi9FlAEBIMzTsfN2JEyckSfHx8ZKkHTt26MyZMxo5cqSvT58+fdS9e3dVVFRo6NChqqioUFpampKSknx9MjIylJubqz179mjAgAHnHMfj8cjj8fiW6+rqWmtIpuF2u5WZmWl0GUE1duxYo0toseLiYkVFRRldBgCEtJC5QLmpqUkPPPCArr/+evXt21eS5HK5FBkZqU6dOvn1TUpKksvl8vX5atA5u/7suvMpLCxUXFyc75OcnBzk0QAAgFARMjM7eXl5+uijj7R58+ZWP1ZBQYHy8/N9y3V1dQSeb2Gz2VRcXGx0GS3m9Xp9s3pWq/WiPwVks9mMLgEAQl5IhJ0pU6Zo9erV2rRpk7p16+ZrdzgcOn36tI4fP+43u1NTUyOHw+Hrs23bNr/9nb1b62yfr7NarbJarUEehblZLBbTnC6Jjo42ugQAQBsy9DSW1+vVlClTtGrVKr377rvq2bOn3/qBAwcqIiJCpaWlvrZ9+/apurpaTqdTkuR0OrV7927V1tb6+pSUlMhutys1NbVtBgIAAEKWoTM7eXl5Kioq0ptvvqnY2FjfNTZxcXGKiopSXFycJk2apPz8fMXHx8tut2vq1KlyOp0aOnSoJCk9PV2pqakaP3685s+fL5fLpVmzZikvL4/ZGwAAYGzYWbJkiSTpxhtv9Gtfvny57r77bknSggULFBYWpuzsbHk8HmVkZOiFF17w9Q0PD9fq1auVm5srp9OpmJgYTZw4UXPmzGmrYQAAgBBm8Xq9XqOLMFpdXZ3i4uJ04sQJ2e12o8sBAATgX//6l+8RGTyWoX1o7t/vkLn1HAAAoDUQdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdtDulJeX684771R5ebnRpQAA2gBhB+2K2+3WM888o5qaGj3zzDNyu91GlwQAaGWEHbQrK1eu1NGjRyVJR48eVVFRkcEVAQBaG2EH7cahQ4dUVFQkr9crSfJ6vSoqKtKhQ4cMrgwA0JoIO2gXvF6vFi1adMH2swEIAGA+hB20C9XV1dq+fbsaGxv92hsbG7V9+3ZVV1cbVBkAoLURdtAudO/eXYMHD1Z4eLhfe3h4uK699lp1797doMoAAK2NsIN2wWKxaNq0aRdst1gsBlQFAGgLhB20G926dVNOTo4v2FgsFuXk5Oiyyy4zuDIAQGsi7KBdGTdunBISEiRJnTt3Vk5OjsEVAQBaG2EH7YrNZlN+fr6SkpI0ffp02Ww2o0sCALSyDkYXALS16667Ttddd53RZQAA2ggzOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQMDTubNm3SmDFj1LVrV1ksFv3lL3/xW3/33XfLYrH4fUaNGuXX59ixYxo3bpzsdrs6deqkSZMm6dSpU204CgAAEMoMDTv19fW6+uqrtXjx4gv2GTVqlI4cOeL7/PGPf/RbP27cOO3Zs0clJSVavXq1Nm3apMmTJ7d26QAA4CJh6LuxMjMzlZmZ+Y19rFarHA7Hedd9/PHHeuedd7R9+3YNGjRIkvTcc89p9OjReuqpp9S1a9fzbufxeOTxeHzLdXV1AY4AAACEupC/Zmfjxo1KTEzUFVdcodzcXB09etS3rqKiQp06dfIFHUkaOXKkwsLC9P77719wn4WFhYqLi/N9kpOTW3UMAADAOCEddkaNGqX/+q//UmlpqX7961+rrKxMmZmZamxslCS5XC4lJib6bdOhQwfFx8fL5XJdcL8FBQU6ceKE73Pw4MFWHQcAADCOoaexvs2Pf/xj3/e0tDT169dPvXr10saNGzVixIiA92u1WmW1WoNRIgAACHEhPbPzdZdffrk6d+6s/fv3S5IcDodqa2v9+jQ0NOjYsWMXvM4HAAC0LxdV2Dl06JCOHj2qLl26SJKcTqeOHz+uHTt2+Pq8++67ampq0pAhQ4wqEwAAhBBDT2OdOnXKN0sjSZ999pkqKysVHx+v+Ph4PfbYY8rOzpbD4VBVVZV+8YtfqHfv3srIyJAkXXnllRo1apTuvfde/fa3v9WZM2c0ZcoU/fjHP77gnVgAAKB9MXRm529/+5sGDBigAQMGSJLy8/M1YMAAPfLIIwoPD9euXbt0yy236Pvf/74mTZqkgQMH6r333vO73mblypXq06ePRowYodGjR+uGG27Qiy++aNSQAABAiDF0ZufGG2+U1+u94Pq1a9d+6z7i4+NVVFQUzLIAAICJXFTX7AAAAHxXIX3rOQCg9Xm9XrndbqPLaLGvjsEM47HZbLJYLEaXYQqEHQBo59xu97e+uudiM3bsWKNLaLHi4mJFRUUZXYYpcBoLAACYGjM7ANDO2Ww2FRcXG11Gi3m9Xt9Lnq1W60V/CshmsxldgmkQdgCgnbNYLKY5XRIdHW10CQhBnMYCAACmRtgBAACmRtgBAACmRtgBAACmRtgBAACmRtgBAACmRtgBAACmRtgBAJhGeXm57rzzTpWXlxtdCkIIYQcAYAput1vPPPOMampq9Mwzz5jiZaAIDsIOAMAUVq5cqaNHj0qSjh49qqKiIoMrQqgg7AAALnqHDh1SUVGRvF6vpC/fk1VUVKRDhw4ZXBlCAWEHAHBR83q9WrRo0QXbzwYgtF+EHQDARa26ulrbt29XY2OjX3tjY6O2b9+u6upqgypDqCDsAAAuat27d9fgwYMVHh7u1x4eHq5rr71W3bt3N6gyhArCDgDgomaxWDRt2rQLtlssFgOqQigh7AAALnrdunVTTk6OL9hYLBbl5OTosssuM7gyhALCDgDAFMaNG6eEhARJUufOnZWTk2NwRQgVhB0AgCnYbDbl5+crKSlJ06dPl81mM7okhIgORhcAAECwXHfddbruuuuMLgMhhpkdAABgaoQdAABgaoQdAABgaoQdAABgaoQdAABgaoQdAABgaoQdAABgaoQdAABgaoQdAABgajxBWZLX65Uk1dXVGVwJAABorrN/t8/+Hb8Qwo6kkydPSpKSk5MNrgQAAHxXJ0+eVFxc3AXXW7zfFofagaamJh0+fFixsbGyWCxGl4NWVldXp+TkZB08eFB2u93ocgAEEb/f7YvX69XJkyfVtWtXhYVd+MocZnYkhYWFqVu3bkaXgTZmt9v5zxAwKX6/249vmtE5iwuUAQCAqRF2AACAqRF20O5YrVY9+uijslqtRpcCIMj4/cb5cIEyAAAwNWZ2AACAqRF2AACAqRF2AACAqRF2AACAqRF20K4sXrxYPXr0kM1m05AhQ7Rt2zajSwIQBJs2bdKYMWPUtWtXWSwW/eUvfzG6JIQQwg7ajT/96U/Kz8/Xo48+qg8++EBXX321MjIyVFtba3RpAFqovr5eV199tRYvXmx0KQhB3HqOdmPIkCEaPHiwnn/+eUlfvhMtOTlZU6dO1UMPPWRwdQCCxWKxaNWqVbrtttuMLgUhgpkdtAunT5/Wjh07NHLkSF9bWFiYRo4cqYqKCgMrAwC0NsIO2oX//d//VWNjo5KSkvzak5KS5HK5DKoKANAWCDsAAMDUCDtoFzp37qzw8HDV1NT4tdfU1MjhcBhUFQCgLRB20C5ERkZq4MCBKi0t9bU1NTWptLRUTqfTwMoAAK2tg9EFAG0lPz9fEydO1KBBg3Tttddq4cKFqq+v1z333GN0aQBa6NSpU9q/f79v+bPPPlNlZaXi4+PVvXt3AytDKODWc7Qrzz//vH7zm9/I5XKpf//+evbZZzVkyBCjywLQQhs3btRNN910TvvEiRO1YsWKti8IIYWwAwAATI1rdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgBA0unTp40uAUArIewACGmvv/660tLSFBUVpYSEBI0cOVL19fWSpN///ve66qqrZLVa1aVLF02ZMsW3XXV1tW699VZ17NhRdrtdP/rRj1RTU+NbP3v2bPXv31+/+93v1LNnT9lsNknS8ePH9R//8R+69NJLZbfbNXz4cO3cubNtBw0gqAg7AELWkSNH9JOf/EQ//elP9fHHH2vjxo26/fbb5fV6tWTJEuXl5Wny5MnavXu33nrrLfXu3VuS1NTUpFtvvVXHjh1TWVmZSkpK9Omnn+rOO+/02//+/fv15z//WW+88YYqKyslSXfccYdqa2tVXFysHTt26JprrtGIESN07Nixth4+gCDhrecAQtYHH3yggQMH6sCBA0pJSfFbd9lll+mee+7R448/fs52JSUlyszM1Geffabk5GRJ0t69e3XVVVdp27ZtGjx4sGbPnq158+bpn//8py699FJJ0ubNm5WVlaXa2lpZrVbf/nr37q1f/OIXmjx5ciuOFkBr6WB0AQBwIVdffbVGjBihtLQ0ZWRkKD09XT/84Q915swZHT58WCNGjDjvdh9//LGSk5N9QUeSUlNT1alTJ3388ccaPHiwJCklJcUXdCRp586dOnXqlBISEvz2969//UtVVVWtMEIAbYGwAyBkhYeHq6SkROXl5Vq3bp2ee+45/epXv1JpaWlQ9h8TE+O3fOrUKXXp0kUbN248p2+nTp2CckwAbY+wAyCkWSwWXX/99br++uv1yCOPKCUlRSUlJerRo4dKS0t10003nbPNlVdeqYMHD+rgwYN+p7GOHz+u1NTUCx7rmmuukcvlUocOHdSjR4/WGhKANkbYARCy3n//fZWWlio9PV2JiYl6//339fnnn+vKK6/U7Nmzdd999ykxMVGZmZk6efKktmzZoqlTp2rkyJFKS0vTuHHjtHDhQjU0NOhnP/uZ/u3f/k2DBg264PFGjhwpp9Op2267TfPnz9f3v/99HT58WG+//bbGjh37jdsCCF2EHQAhy263a9OmTVq4cKHq6uqUkpKip59+WpmZmZIkt9utBQsW6Oc//7k6d+6sH/7wh5K+nA168803NXXqVA0bNkxhYWEaNWqUnnvuuW88nsVi0Zo1a/SrX/1K99xzjz7//HM5HA4NGzZMSUlJrT5eAK2Du7EAAICp8ZwdAABgaoQdAABgaoQdAABgaoQdAABgaoQdAABgaoQdAABgaoQdAABgaoQdAABgaoQdAABgaoQdAABgaoQdAABgav8fiX6zTMOoTtIAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.boxplot(x='score', y='avg_chunk_len', data=pred_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "7c23c763-f6cf-43b1-9659-8617ce1ddb8e",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:07:04.944329Z",
     "iopub.status.busy": "2024-12-04T14:07:04.944197Z",
     "iopub.status.idle": "2024-12-04T14:07:05.191986Z",
     "shell.execute_reply": "2024-12-04T14:07:05.191536Z",
     "shell.execute_reply.started": "2024-12-04T14:07:04.944315Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Axes: xlabel='score', ylabel='max_chunk_len'>"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAGwCAYAAABPSaTdAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAuiElEQVR4nO3df1TUdaL/8dcAwuCPgUQd/AHU9kspTPMHDG7tpii51mZpWpBxza2rF38E/fRUataq186SVprXvW665zi51q27+YOS2LRWCAmzLMtq614gBFoMUE8DOjPfP/o63ya1/A4Dn+HD83HOnOO8P5+Zz+sTMbzm89Pi9Xq9AgAAMKkwowMAAAC0J8oOAAAwNcoOAAAwNcoOAAAwNcoOAAAwNcoOAAAwNcoOAAAwtQijA4QCj8ejmpoa9erVSxaLxeg4AADgPHi9Xh07dkwDBgxQWNi5t99QdiTV1NQoISHB6BgAACAAVVVVGjRo0DmnU3Yk9erVS9L3/7FsNpvBaQAAwPlobm5WQkKC7+/4uVB2JN+uK5vNRtkBAKCT+blDUDhAGQAAmBplBwAAmBplBwAAmBplBwAAmBplBwAAmBplBwAAmBplBwAAmBplBwAAmBplBwAAmBplBwBgGiUlJZo+fbpKSkqMjoIQQtkBAJiCy+VSQUGB6urqVFBQIJfLZXQkhAjKDgDAFDZv3qyGhgZJUkNDg5xOp8GJECooOwCATq+6ulpOp1Ner1eS5PV65XQ6VV1dbXAyhALKDgCgU/N6vVq9evU5x08XIHRdlB0AQKdWWVmp8vJyud1uv3G3263y8nJVVlYalAyhgrIDAOjUEhMTNWrUKIWHh/uNh4eHa/To0UpMTDQoGUIFZQcA0KlZLBYtWLDgnOMWi8WAVAgllB0AQKc3aNAgZWVl+YqNxWJRVlaWBg4caHAyhALKDgDAFLKzsxUXFydJ6tOnj7KysgxOhFBB2QEAmILValV+fr7sdrvy8vJktVqNjoQQEWF0AAAAgiU9PV3p6elGx0CIYcsOAAAwNcoOAAAwNcoOAAAwNcoOAAAwNcoOAAAwNcoOAAAwNcoOAAAwNcoOAAAwNcoOAAAwNcoOupySkhJNnz5dJSUlRkcBEGQbNmzQ2LFjtWHDBqOjIIRQdtCluFwuFRQUqK6uTgUFBXK5XEZHAhAkjY2N2rx5szwejzZv3qzGxkajIyFEUHbQpWzevFkNDQ2SpIaGBjmdToMTAQiWxx57TB6PR5Lk8Xi0aNEigxMhVFB20GVUV1fL6XTK6/VKkrxer5xOp6qrqw1OBqCt3nvvPR08eNBv7MMPP9R7771nUCKEEsoOugSv16vVq1efc/x0AQLQ+Xg8Hi1duvSs05YuXerb2oOui7KDLqGyslLl5eVyu91+4263W+Xl5aqsrDQoGYC2KisrU3Nz81mnNTc3q6ysrIMTIdRQdtAlJCYmatSoUQoPD/cbDw8P1+jRo5WYmGhQMgBtlZqaKpvNdtZpMTExSk1N7eBECDWUHXQJFotFCxYsOOe4xWIxIBWAYAgLCzvnwciLFy9WWBh/6ro6/g9AlzFo0CBlZWX5io3FYlFWVpYGDhxocDIAbTVy5EilpKT4jQ0dOlRXX321QYkQSig76FKys7MVFxcnSerTp4+ysrIMTgQgWJ544gnfVpywsLBzHrSMroeygy7FarUqPz9fdrtdeXl5slqtRkcCECSxsbHKzs5WWFiYsrOzFRsba3QkhAiLl3Nu1dzcrJiYGDU1NZ3zIDcAABBazvfvN1t2AACAqVF2AACAqVF2AACAqVF2AACAqVF2AACAqVF2AACAqVF2AACAqVF2AACAqVF2AACAqVF2AACAqVF2AACAqVF2AACAqVF2AACAqRledr7++mvdcccdiouLU3R0tFJSUvTee+/5pnu9Xi1atEj9+/dXdHS0MjIy9Pnnn/u9x9GjR5WdnS2bzabY2FjNmjVLx48f7+hVAQAAIcjQsvPtt99qzJgx6tatmwoLC3Xo0CH94Q9/0AUXXOCbZ+XKlXrmmWe0bt06lZWVqUePHsrMzJTL5fLNk52drY8//lhFRUXavn273n77bd1zzz1GrBIAAAgxFq/X6zVq4Q8//LD27t2rd95556zTvV6vBgwYoPvuu0/333+/JKmpqUl2u10bN27Ubbfdpk8++UTJyckqLy/XyJEjJUmvv/66fvOb36i6uloDBgw4431bWlrU0tLie97c3KyEhAQ1NTXJZrO1w5oCAIBga25uVkxMzM/+/TZ0y85rr72mkSNH6tZbb1W/fv00fPhw/fGPf/RN/+qrr1RbW6uMjAzfWExMjFJTU1VaWipJKi0tVWxsrK/oSFJGRobCwsJUVlZ21uUuX75cMTExvkdCQkI7rSEAADCaoWXnyy+/1PPPP69LL71Ub7zxhubMmaP58+dr06ZNkqTa2lpJkt1u93ud3W73TautrVW/fv38pkdERKh3796+eX5s4cKFampq8j2qqqqCvWoAACBERBi5cI/Ho5EjR2rZsmWSpOHDh+ujjz7SunXrlJOT027LjYqKUlRUVLu9PwAACB2Gbtnp37+/kpOT/caGDBmiyspKSVJ8fLwkqa6uzm+euro637T4+HjV19f7TT916pSOHj3qmwcAAHRdhpadMWPG6PDhw35jn332mZKSkiRJF110keLj41VcXOyb3tzcrLKyMjkcDkmSw+FQY2OjKioqfPP87W9/k8fjUWpqagesBQAACGWG7sbKy8tTenq6li1bpmnTpmnfvn1av3691q9fL0myWCy699579eSTT+rSSy/VRRddpMcee0wDBgzQ5MmTJX2/Jej666/X3XffrXXr1unkyZOaO3eubrvttrOeiQUAALoWQ089l6Tt27dr4cKF+vzzz3XRRRcpPz9fd999t2+61+vV4sWLtX79ejU2NuqXv/yl1q5dq8suu8w3z9GjRzV37lxt27ZNYWFhmjJlip555hn17NnzvDKc76lrAAAgdJzv32/Dy04ooOwAAND5dIrr7AAAALQ3yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADA1yg4AADC1CKMDAACM5fV65XK5jI7RZl6vVy0tLZKkqKgoWSwWgxO1jdVq7fTrECooOwDQxblcLk2cONHoGPiRwsJCRUdHGx3DFNiNBQAATI0tOwDQxVmtVhUWFhodo81cLpduvvlmSdKrr74qq9VqcKK26ez5QwllBwC6OIvFYrrdJVar1XTrhMCxGwsAAJgaZQcAAJgaZQcAAJiaoWVnyZIlslgsfo/Bgwf7prtcLuXm5iouLk49e/bUlClTVFdX5/celZWVmjRpkrp3765+/frpgQce0KlTpzp6VQAAQIgy/ADlK664Qm+++abveUTE/4uUl5enHTt26KWXXlJMTIzmzp2rW265RXv37pUkud1uTZo0SfHx8SopKdGRI0d05513qlu3blq2bFmHrwsAAAg9hpediIgIxcfHnzHe1NSkDRs2yOl0auzYsZKkF154QUOGDNG7776rtLQ07dq1S4cOHdKbb74pu92uYcOG6YknntBDDz2kJUuWKDIysqNXBwAAhBjDy87nn3+uAQMGyGq1yuFwaPny5UpMTFRFRYVOnjypjIwM37yDBw9WYmKiSktLlZaWptLSUqWkpMhut/vmyczM1Jw5c/Txxx9r+PDhZ11mS0uL75LiktTc3Nx+K2gSXE4+NHE5eQD4eYaWndTUVG3cuFGXX365jhw5oscff1zXXHONPvroI9XW1ioyMlKxsbF+r7Hb7aqtrZUk1dbW+hWd09NPTzuX5cuX6/HHHw/uypgcl5MPTVxOHgB+XsBlp7GxUfv27VN9fb08Ho/ftDvvvPO83uOHfzyHDh2q1NRUJSUlaevWre36Ab5w4ULl5+f7njc3NyshIaHdlgcAAIwTUNnZtm2bsrOzdfz4cdlsNr/N6BaL5bzLzo/Fxsbqsssu0xdffKHx48ertbVVjY2Nflt36urqfMf4xMfHa9++fX7vcfpsrbMdB3RaVFSUoqKiAsrYVXE5+dDU2fMDQEcIqOzcd999uuuuu7Rs2TJ17949aGGOHz+uf/zjH5oxY4ZGjBihbt26qbi4WFOmTJEkHT58WJWVlXI4HJIkh8Oh3//+96qvr1e/fv0kSUVFRbLZbEpOTg5aLnA5eQBA5xVQ2fn66681f/78Nhed+++/XzfeeKOSkpJUU1OjxYsXKzw8XLfffrtiYmI0a9Ys5efnq3fv3rLZbJo3b54cDofS0tIkSRMmTFBycrJmzJihlStXqra2Vo8++qhyc3PZcgMAACQFWHYyMzP13nvv6Re/+EWbFl5dXa3bb79dDQ0N6tu3r375y1/q3XffVd++fSVJTz/9tMLCwjRlyhS1tLQoMzNTa9eu9b0+PDxc27dv15w5c+RwONSjRw/l5ORo6dKlbcoFAADMI6CyM2nSJD3wwAM6dOiQUlJS1K1bN7/pv/3tb8/rfbZs2fKT061Wq9asWaM1a9acc56kpCTt3LnzvJYHAAC6noDKzt133y1JZ92CYrFY5Ha725YKAAAgSAIqOz8+1RwAACBUtfmigi6Xi9Nff4ZZrj5sBj/8OfAzCR1cCRpAewqo7Ljdbi1btkzr1q1TXV2dPvvsM/3iF7/QY489pgsvvFCzZs0Kds5OjasPh6bT19uB8TrzlaD5MhM6+DITmkLhy0xAZef3v/+9Nm3apJUrV/qO35GkK6+8UqtWraLsAOgy+DITmvgyEzpC4ctMQGXnz3/+s9avX69x48Zp9uzZvvGrrrpKn376adDCmdHxYbfLG2b4/Ve7Lq9X8pz6/t9hERK7Tgxj8ZxSzwMvGh0DQBcQ8EUFL7nkkjPGPR6PTp482eZQZuYNi5DCu/38jGhHkUYHgCSv0QHawXO/PKqocDOuWefg9Uqt//f8mcgwvssYqcVt0dy/9zY6hk9AZSc5OVnvvPOOkpKS/MZffvllDR8+PCjBAKCziQr3Kirc6BRdG6fLhIrQKv0BlZ1FixYpJydHX3/9tTwej1555RUdPnxYf/7zn7V9+/ZgZwQAAAhYQGXnpptu0rZt27R06VL16NFDixYt0tVXX61t27Zp/Pjxwc5oLm528wGS+F0A0GECPlL2mmuuUVFRUTCzdAm9PvjpW2QAAIDgCjM6AAAAQHs67y07F1xwwXlfFOjo0aMBBzK7Y1fdxtlYgCS5T7KlE0CHOO+ys2rVqnaM0YWEd6PsACbVwj2QAUmh97tw3mUnJyfn//vNV6xYodmzZys2Nvb/+7UA0NnM/Xuc0REAnEW7HrOzbNkydmkBAABDtet9C7ze0LqoEAC0p+d+2cBFBQF9vxsrlLZ0cpMmAAiSqHBRdoAQxKnnAADA1Cg7AADA1Cg7AADA1IJedr777jvfv6+55hpFR0cHexEAAADnLaCyM3/+/LOOnzhxQr/5zW98z3fu3Kn+/fsHlgwAACAIAio7O3bs0OLFi/3GTpw4oeuvv16nTp0KSjAAAIBgCOjU8127dumaa67RBRdcoHvvvVfHjh1TZmamIiIiVFhYGOyMAAAAAQuo7Fx88cV6/fXXdd111yksLEwvvviioqKitGPHDvXo0SPYGQEAAAIW8EUFhw4dqu3bt2v8+PFKTU3V9u3bORgZAACEnPMuO8OHD5fFYjljPCoqSjU1NRozZoxvbP/+/cFJBwAA0EbnXXYmT57cjjEAAADax3mXnR+ffQUAANAZtOlGoK2traqvr5fH4/EbT0xMbFMoAACAYAmo7Hz22WeaNWuWSkpK/Ma9Xq8sFovcbndQwgEAALRVQGVn5syZioiI0Pbt29W/f/+zHrgMAAAQCgIqOwcOHFBFRYUGDx4c7DwAAABBFdDtIpKTk/XPf/4z2FkAAACCLqCy8+///u968MEHtXv3bjU0NKi5udnvAQAAECoC2o2VkZEhSRo3bpzfOAcoAwCAUBNQ2XnrrbeCnQMAAKBdBFR2fvWrXwU7BwAAQLsIqOy8/fbbPzn92muvDSgMAABAsAVUdn7961+fMfbDa+1wzA4AAAgVAZ2N9e233/o96uvr9frrr2vUqFHatWtXsDMCAAAELKAtOzExMWeMjR8/XpGRkcrPz1dFRUWbgwEAAARDQFt2zsVut+vw4cPBfEsAAIA2CWjLzocffuj33Ov16siRI1qxYoWGDRsWjFwAAABBEVDZGTZsmCwWi7xer994Wlqa/vSnPwUlGAAAQDAEVHa++uorv+dhYWHq27evrFZrUEIBAAAES0BlJykpKdg5AAAA2kVAZUeSiouLVVxcrPr6enk8Hr9p7MoCAAChIqCy8/jjj2vp0qUaOXKk+vfv73dBQQAAgFASUNlZt26dNm7cqBkzZgQ7DwAAQFAFdJ2d1tZWpaenBzsLAABA0AVUdn73u9/J6XQGOwsAAEDQnfdurPz8fN+/PR6P1q9frzfffFNDhw5Vt27d/OYtKCgIXkIAAIA2OO+y8/777/s9P32l5I8++shvnIOVAQBAKDnvsvPWW2+1Zw4AAIB2EdAxO01NTTp69OgZ40ePHlVzc3ObQwEAAARLQGXntttu05YtW84Y37p1q2677bY2hwIAAAiWgMpOWVmZrrvuujPGf/3rX6usrKzNoQAAAIIloLLT0tKiU6dOnTF+8uRJfffdd20OBQAAECwBlZ3Ro0dr/fr1Z4yvW7dOI0aMaHMoAACAYAnodhFPPvmkMjIy9MEHH2jcuHGSvr8xaHl5uXbt2hXUgAAAAG0RUNkZM2aMSktL9dRTT2nr1q2Kjo7W0KFDtWHDBl166aXBzggAnUKL2yLJa3SMLsvrlVo93/87Mkzism/G+f53IXQEVHak7y8quHnz5p+cZ8WKFZo9e7ZiY2PP6z1XrFihhQsXasGCBVq1apUkyeVy6b777tOWLVvU0tKizMxMrV27Vna73fe6yspKzZkzR2+99ZZ69uypnJwcLV++XBERAa9eu7F4TvFRaCSvV/L83+PNwiL4NDSQxXPmcX+d3dy/9zY6AoCzaNc2sGzZMk2bNu28yk55ebn+4z/+Q0OHDvUbz8vL044dO/TSSy8pJiZGc+fO1S233KK9e/dKktxutyZNmqT4+HiVlJToyJEjuvPOO9WtWzctW7asPVarTXoeeNHoCAAAdCntWna83vPbhnH8+HFlZ2frj3/8o5588knfeFNTkzZs2CCn06mxY8dKkl544QUNGTJE7777rtLS0rRr1y4dOnRIb775pux2u4YNG6YnnnhCDz30kJYsWaLIyMh2WTcAkCSr1arCwkKjY0Df7wm4+eabJUmvvvqqrFarwYkgKSR+DiGxnyc3N1eTJk1SRkaGX9mpqKjQyZMnlZGR4RsbPHiwEhMTVVpaqrS0NJWWliolJcVvt1ZmZqbmzJmjjz/+WMOHDz9jeS0tLWppafE9b++rPvNhGDr4MAxNnfnnYLFYFB0dbXQM/IjVauXnAh/Dy86WLVu0f/9+lZeXnzGttrZWkZGRZ+wGs9vtqq2t9c3zw6JzevrpaWezfPlyPf7440FIf374MAxNfBgCQNcQ0HV2gqWqqkoLFizQ5s2bO/Sb3cKFC9XU1OR7VFVVddiyAQBAxzK07FRUVKi+vl5XX321IiIiFBERoT179uiZZ55RRESE7Ha7Wltb1djY6Pe6uro6xcfHS5Li4+NVV1d3xvTT084mKipKNpvN7wEAAMypXcvONddc85O7CcaNG6eDBw/qwIEDvsfIkSOVnZ3t+3e3bt1UXFzse83hw4dVWVkph8MhSXI4HDp48KDq6+t98xQVFclmsyk5Obn9Vg4AAHQKAZWdjRs3nnX81KlTWrhwoe/5zp071b9//3O+T69evXTllVf6PXr06KG4uDhdeeWViomJ0axZs5Sfn6+33npLFRUVmjlzphwOh9LS0iRJEyZMUHJysmbMmKEPPvhAb7zxhh599FHl5uYqKioqkNUDAAAmElDZmT9/vm699VZ9++23vrHDhw8rNTVVL74Y3OvIPP3007rhhhs0ZcoUXXvttYqPj9crr7zimx4eHq7t27crPDxcDodDd9xxh+68804tXbo0qDkAAEDnFNDZWO+//77uuOMOpaSk6IUXXtBnn32mBx98UJMnT9batWvbFGj37t1+z61Wq9asWaM1a9ac8zVJSUnauXNnm5YLAADMKaCyc/HFF2vv3r269957df311ys8PFybNm3S7bffHux8AAAAbRLwAco7duzQli1b5HA4FBsbqw0bNqimpiaY2QAAANosoLLzr//6r7r11lv10EMP6Z133tGHH36oyMhIpaSkaOvWrcHOCAAAELCAdmPt3btXZWVluuqqqyR9fz2bnTt3as2aNbrrrrs0bdq0oIYEAAAIVEBlp6Ki4qyndefm5vrdxwoAAMBoAe3G+qnr11x++eUBhwEAAAi2gG8E+vLLL2vr1q2qrKxUa2ur37T9+/e3ORgAAEAwBLRl55lnntHMmTNlt9v1/vvva/To0YqLi9OXX36piRMnBjsjAABAwAIqO2vXrtX69ev17LPPKjIyUg8++KCKioo0f/58NTU1BTsjAABAwAIqO5WVlUpPT5ckRUdH69ixY5KkGTNmBP12EQAAAG0RUNmJj4/X0aNHJUmJiYl69913JUlfffWVvF5v8NIBAAC0UUBlZ+zYsXrttdckSTNnzlReXp7Gjx+v6dOn6+abbw5qQAAAgLYI6Gys9evXy+PxSPr+2jp9+vTR3r179dvf/lazZ88OakAAAIC2CKjshIWFqbW1Vfv371d9fb2io6N9FxN8/fXXdeONNwY1JIzn9XrlcrmMjtFmP1wHM6yP1WqVxWIxOgYAhLSAys7rr7+uGTNmqKGh4YxpFotFbre7zcEQWlwul+kuK2CGXa6FhYWKjo42OgYAhLSAjtmZN2+epk2bpiNHjsjj8fg9KDoAACCUBLRlp66uTvn5+bLb7cHOgxBltVpVWFhodIw283q9amlpkfT9bU86+y4gq9VqdAQACHkBlZ2pU6dq9+7duvjii4OdByHKYrGYZndJ9+7djY4AAOhAAZWd5557TrfeeqveeecdpaSkqFu3bn7T58+fH5RwAAAAbRVQ2XnxxRe1a9cuWa1W7d69229XgMVioewAAICQEVDZeeSRR/T444/r4YcfVlhYQMc4AwAAdIiAmkpra6umT59O0QEAACEvoLaSk5Ojv/zlL8HOAgAAEHQB7cZyu91auXKl3njjDQ0dOvSMA5QLCgqCEg4AAKCtAio7Bw8e1PDhwyVJH330kd+0zn7dEgAAYC4BlZ233nor2DkAAADaBUcYAwAAU6PsAAAAU6PsAAAAU6PsAAAAU6PsAAAAU6PsAAAAU6PsAAAAU6PsAAAAU6PsAAAAU6PsAAAAUwvodhEAAPPwer1yuVxGx2izH66DGdbHarVyv8kgoewAQBfncrk0ceJEo2ME1c0332x0hDYrLCxUdHS00TFMgd1YAADA1NiyAwBdnNVqVWFhodEx2szr9aqlpUWSFBUV1el3AVmtVqMjmAZlBwC6OIvFYprdJd27dzc6AkIQu7EAAICpUXYAAICpUXYAAICpUXYAAICpUXYAAICpUXYAAICpUXYAAICpUXYAAICpUXYAAICpUXYAAICpUXYAAICpUXYAAICpUXYAAICpUXYAAICpUXYAAICpUXYAAICpUXYAAICpUXYAAICpUXYAAICpUXYAAICpUXYAAICpUXYAAICpGVp2nn/+eQ0dOlQ2m002m00Oh0OFhYW+6S6XS7m5uYqLi1PPnj01ZcoU1dXV+b1HZWWlJk2apO7du6tfv3564IEHdOrUqY5eFQAAEKIMLTuDBg3SihUrVFFRoffee09jx47VTTfdpI8//liSlJeXp23btumll17Snj17VFNTo1tuucX3erfbrUmTJqm1tVUlJSXatGmTNm7cqEWLFhm1SgAAIMRYvF6v1+gQP9S7d2899dRTmjp1qvr27Sun06mpU6dKkj799FMNGTJEpaWlSktLU2FhoW644QbV1NTIbrdLktatW6eHHnpI33zzjSIjI89rmc3NzYqJiVFTU5NsNlu7rRsAAAie8/37HTLH7Ljdbm3ZskUnTpyQw+FQRUWFTp48qYyMDN88gwcPVmJiokpLSyVJpaWlSklJ8RUdScrMzFRzc7Nv69DZtLS0qLm52e8BAADMyfCyc/DgQfXs2VNRUVGaPXu2Xn31VSUnJ6u2tlaRkZGKjY31m99ut6u2tlaSVFtb61d0Tk8/Pe1cli9frpiYGN8jISEhuCsFAABChuFl5/LLL9eBAwdUVlamOXPmKCcnR4cOHWrXZS5cuFBNTU2+R1VVVbsuDwAAGCfC6ACRkZG65JJLJEkjRoxQeXm5Vq9erenTp6u1tVWNjY1+W3fq6uoUHx8vSYqPj9e+ffv83u/02Vqn5zmbqKgoRUVFBXlNAABAKDJ8y86PeTwetbS0aMSIEerWrZuKi4t90w4fPqzKyko5HA5JksPh0MGDB1VfX++bp6ioSDabTcnJyR2eHQAAhB5Dt+wsXLhQEydOVGJioo4dOyan06ndu3frjTfeUExMjGbNmqX8/Hz17t1bNptN8+bNk8PhUFpamiRpwoQJSk5O1owZM7Ry5UrV1tbq0UcfVW5uLltuAACAJIPLTn19ve68804dOXJEMTExGjp0qN544w2NHz9ekvT0008rLCxMU6ZMUUtLizIzM7V27Vrf68PDw7V9+3bNmTNHDodDPXr0UE5OjpYuXWrUKgEAgBATctfZMQLX2QEAoPPpdNfZAQAAaA+UHQAAYGqUHQAAYGqUHQAAYGqUHQAAYGqUHQAAYGqUHQAAYGqUHQAAYGqUHQAAYGqUHQAAYGqUHQAAYGqUHQAAYGqUHQAAYGqUHQAAYGqUHQCAaZSUlGj69OkqKSkxOgpCCGUHAGAKLpdLBQUFqqurU0FBgVwul9GRECIoOwAAU9i8ebMaGhokSQ0NDXI6nQYnQqig7AAAOr3q6mo5nU55vV5JktfrldPpVHV1tcHJEAooOwCATs3r9Wr16tXnHD9dgNB1UXYAAJ1aZWWlysvL5Xa7/cbdbrfKy8tVWVlpUDKECsoOAKBTS0xM1KhRoxQeHu43Hh4ertGjRysxMdGgZAgVlB0AQKdmsVi0YMGCc45bLBYDUiGUUHYAAJ3eoEGDNG3aNL+xadOmaeDAgQYlQiih7AAAAFOj7AAAOr3q6mpt3brVb2zr1q2ceg5JlB0AQCfHqef4OZQdAECnxqnn+DmUHQBAp8ap5/g5lB0AQKfGqef4OZQdAECnN2jQIGVlZfmKjcViUVZWFqeeQxJlBwBgEtnZ2YqLi5Mk9enTR1lZWQYnQqig7AAATMFqtSo/P192u115eXmyWq1GR0KIiDA6AAAAwZKenq709HSjYyDEsGUHAACYGmUHAACYGmUHAGAaJSUlmj59ukpKSoyOghBC2UGXw4chYE4ul0sFBQWqq6tTQUGBXC6X0ZEQIig76FL4MATMa/PmzWpoaJAkNTQ0yOl0GpwIoYKygy6FD0PAnKqrq+V0On03/fR6vXI6ndz1HJIoO+hC+DAEzOn03c09Ho/fuNvt5q7nkETZQRdx+sPwXON8GAKd1+m7nv/499jr9XLXc0ii7KCLOP1h6Ha7/cbdbjcfhkAnl5CQIJvNdtZpNptNCQkJHZwIoYaygy4hMTFRo0aNUnh4uN94eHi4Ro8ercTERIOSAWirqqoqNTc3n3Vac3OzqqqqOjgRQg1lB12CxWLRggULzjl++k7JADqf019mfvx7bLFY+DIDSZQddCGDBg1SVlaW7wPRYrEoKytLAwcONDgZgLY415eWsLAwvsxAEmUHXUx2drbi4uIkSX369FFWVpbBiQAEw6BBg5Sdne03lp2dzZcZSKLsoIuxWq3Kz8+X3W5XXl6erFar0ZEABEl2drb69OkjSerbty9fZuBj8XLOrZqbmxUTE6OmpqZzHtEPAAh9JSUlWr16tRYsWKD09HSj46Cdne/f74gOzAQAQLtKT0+n5OAM7MYCAACmRtkBAACmRtkBAACmRtkBAACmRtkBAACmRtkBAACmRtkBAACmRtkBAACmRtkBAACmRtkBAJhGSUmJpk+frpKSEqOjIIRQdgAApuByuVRQUKC6ujoVFBTI5XIZHQkhgrIDADCFzZs3q6GhQZLU0NAgp9NpcCKECsoOAKDTq66ultPplNfrlSR5vV45nU5VV1cbnAyhgLIDAOjUvF6vVq9efc7x0wUIXZehZWf58uUaNWqUevXqpX79+mny5Mk6fPiw3zwul0u5ubmKi4tTz549NWXKFNXV1fnNU1lZqUmTJql79+7q16+fHnjgAZ06daojVwUAYJDKykqVl5fL7Xb7jbvdbpWXl6uystKgZAgVhpadPXv2KDc3V++++66Kiop08uRJTZgwQSdOnPDNk5eXp23btumll17Snj17VFNTo1tuucU33e12a9KkSWptbVVJSYk2bdqkjRs3atGiRUasEgCggyUmJmrUqFEKDw/3Gw8PD9fo0aOVmJhoUDKECos3hLbvffPNN+rXr5/27Nmja6+9Vk1NTerbt6+cTqemTp0qSfr00081ZMgQlZaWKi0tTYWFhbrhhhtUU1Mju90uSVq3bp0eeughffPNN4qMjPzZ5TY3NysmJkZNTU2y2Wztuo4AgOCrrq5WTk6O39adiIgIbdq0SQMHDjQwGdrT+f79DqljdpqamiRJvXv3liRVVFTo5MmTysjI8M0zePBgJSYmqrS0VJJUWlqqlJQUX9GRpMzMTDU3N+vjjz8+63JaWlrU3Nzs9wAAdF6DBg1SVlaWLBaLJMlisSgrK4uiA0khVHY8Ho/uvfdejRkzRldeeaUkqba2VpGRkYqNjfWb1263q7a21jfPD4vO6emnp53N8uXLFRMT43skJCQEeW0AAB0tOztbcXFxkqQ+ffooKyvL4EQIFSFTdnJzc/XRRx9py5Yt7b6shQsXqqmpyfeoqqpq92UCANqX1WpVfn6+7Ha78vLyZLVajY6EEBFhdABJmjt3rrZv3663335bgwYN8o3Hx8ertbVVjY2Nflt36urqFB8f75tn3759fu93+myt0/P8WFRUlKKiooK8FgAAo6Wnpys9Pd3oGAgxhm7Z8Xq9mjt3rl599VX97W9/00UXXeQ3fcSIEerWrZuKi4t9Y4cPH1ZlZaUcDockyeFw6ODBg6qvr/fNU1RUJJvNpuTk5I5ZEQAAELIM3bKTm5srp9Opv/71r+rVq5fvGJuYmBhFR0crJiZGs2bNUn5+vnr37i2bzaZ58+bJ4XAoLS1NkjRhwgQlJydrxowZWrlypWpra/Xoo48qNzeXrTcAAMDYU89PHzX/Yy+88IL+5V/+RdL3FxW877779OKLL6qlpUWZmZlau3at3y6q//3f/9WcOXO0e/du9ejRQzk5OVqxYoUiIs6vy3HqOQAAnc/5/v0OqevsGIWyAwBA59Mpr7MDAAAQbJQdAABgapQdAABgapQdAABgapQdAABgaiFxBWWjnT4hjRuCAgDQeZz+u/1zJ5ZTdiQdO3ZMkrghKAAAndCxY8cUExNzzulcZ0ff33G9pqZGvXr1OueFDmEezc3NSkhIUFVVFddVAkyG3++uxev16tixYxowYIDCws59ZA5bdiSFhYX53YAUXYPNZuPDEDApfr+7jp/aonMaBygDAABTo+wAAABTo+ygy4mKitLixYsVFRVldBQAQcbvN86GA5QBAICpsWUHAACYGmUHAACYGmUHAACYGmUHAACYGmUHXcqaNWt04YUXymq1KjU1Vfv27TM6EoAgePvtt3XjjTdqwIABslgs+u///m+jIyGEUHbQZfzlL39Rfn6+Fi9erP379+uqq65SZmam6uvrjY4GoI1OnDihq666SmvWrDE6CkIQp56jy0hNTdWoUaP03HPPSfr+nmgJCQmaN2+eHn74YYPTAQgWi8WiV199VZMnTzY6CkIEW3bQJbS2tqqiokIZGRm+sbCwMGVkZKi0tNTAZACA9kbZQZfwz3/+U263W3a73W/cbrertrbWoFQAgI5A2QEAAKZG2UGX0KdPH4WHh6uurs5vvK6uTvHx8QalAgB0BMoOuoTIyEiNGDFCxcXFvjGPx6Pi4mI5HA4DkwEA2luE0QGAjpKfn6+cnByNHDlSo0eP1qpVq3TixAnNnDnT6GgA2uj48eP64osvfM+/+uorHThwQL1791ZiYqKByRAKOPUcXcpzzz2np556SrW1tRo2bJieeeYZpaamGh0LQBvt3r1b11133RnjOTk52rhxY8cHQkih7AAAAFPjmB0AAGBqlB0AAGBqlB0AAGBqlB0AAGBqlB0AAGBqlB0AAGBqlB0AAGBqlB0AAGBqlB0AAGBqlB0AAGBqlB0AkNTa2mp0BADthLIDIKS9/PLLSklJUXR0tOLi4pSRkaETJ05Ikv70pz/piiuuUFRUlPr376+5c+f6XldZWambbrpJPXv2lM1m07Rp01RXV+ebvmTJEg0bNkz/+Z//qYsuukhWq1WS1NjYqN/97nfq27evbDabxo4dqw8++KBjVxpAUFF2AISsI0eO6Pbbb9ddd92lTz75RLt379Ytt9wir9er559/Xrm5ubrnnnt08OBBvfbaa7rkkkskSR6PRzfddJOOHj2qPXv2qKioSF9++aWmT5/u9/5ffPGF/uu//kuvvPKKDhw4IEm69dZbVV9fr8LCQlVUVOjqq6/WuHHjdPTo0Y5efQBBwl3PAYSs/fv3a8SIEfqf//kfJSUl+U0bOHCgZs6cqSeffPKM1xUVFWnixIn66quvlJCQIEk6dOiQrrjiCu3bt0+jRo3SkiVLtGzZMn399dfq27evJOnvf/+7Jk2apPr6ekVFRfne75JLLtGDDz6oe+65px3XFkB7iTA6AACcy1VXXaVx48YpJSVFmZmZmjBhgqZOnaqTJ0+qpqZG48aNO+vrPvnkEyUkJPiKjiQlJycrNjZWn3zyiUaNGiVJSkpK8hUdSfrggw90/PhxxcXF+b3fd999p3/84x/tsIYAOgJlB0DICg8PV1FRkUpKSrRr1y49++yzeuSRR1RcXByU9+/Ro4ff8+PHj6t///7avXv3GfPGxsYGZZkAOh5lB0BIs1gsGjNmjMaMGaNFixYpKSlJRUVFuvDCC1VcXKzrrrvujNcMGTJEVVVVqqqq8tuN1djYqOTk5HMu6+qrr1Ztba0iIiJ04YUXttcqAehglB0AIausrEzFxcWaMGGC+vXrp7KyMn3zzTcaMmSIlixZotmzZ6tfv36aOHGijh07pr1792revHnKyMhQSkqKsrOztWrVKp06dUr/9m//pl/96lcaOXLkOZeXkZEhh8OhyZMna+XKlbrssstUU1OjHTt26Oabb/7J1wIIXZQdACHLZrPp7bff1qpVq9Tc3KykpCT94Q9/0MSJEyVJLpdLTz/9tO6//3716dNHU6dOlfT91qC//vWvmjdvnq699lqFhYXp+uuv17PPPvuTy7NYLNq5c6ceeeQRzZw5U998843i4+N17bXXym63t/v6AmgfnI0FAABMjevsAAAAU6PsAAAAU6PsAAAAU6PsAAAAU6PsAAAAU6PsAAAAU6PsAAAAU6PsAAAAU6PsAAAAU6PsAAAAU6PsAAAAU/s/YTKdlhUb10sAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.boxplot(x='score', y='max_chunk_len', data=pred_df)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "25f46ec3-2847-4f84-ba82-de05f8d851ac",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-09T14:24:05.210584Z",
     "iopub.status.busy": "2024-12-09T14:24:05.210235Z",
     "iopub.status.idle": "2024-12-09T14:24:05.215518Z",
     "shell.execute_reply": "2024-12-09T14:24:05.215071Z",
     "shell.execute_reply.started": "2024-12-09T14:24:05.210567Z"
    }
   },
   "source": [
    "从最终答案打分来看，正常了很多，从正确/错误的问题对应的上下文长度来看，也都比较正常"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ebd5a509-b20e-4b00-b490-6777b3be1c68",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  },
  "papermill": {
   "default_parameters": {},
   "duration": 1058.563616,
   "end_time": "2024-11-23T14:46:37.625874",
   "environment_variables": {},
   "exception": null,
   "input_path": "13_contextual_embeddings.ipynb",
   "output_path": "run_13_contextual_embeddings.ipynb",
   "parameters": {},
   "start_time": "2024-11-23T14:28:59.062258",
   "version": "2.6.0"
  },
  "widgets": {
   "application/vnd.jupyter.widget-state+json": {
    "state": {
     "0cd8c168767249f2a5fa412173f6e751": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "FloatProgressModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "FloatProgressModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "ProgressView",
       "bar_style": "success",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_5ce1d1d9d86c40d9839877ff95734491",
       "max": 100,
       "min": 0,
       "orientation": "horizontal",
       "style": "IPY_MODEL_231702cf4d79477f9d5548665a1b18fe",
       "tabbable": null,
       "tooltip": null,
       "value": 100
      }
     },
     "2133bb8d85d34b8db112b4408ad60320": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "231702cf4d79477f9d5548665a1b18fe": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "ProgressStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "ProgressStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "bar_color": null,
       "description_width": ""
      }
     },
     "23b1ad9c0f9c46c888da66e85c90eb84": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "24e6eadc3dc940ecabf30dd1a3c6d1f3": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "FloatProgressModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "FloatProgressModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "ProgressView",
       "bar_style": "success",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_fa4bddf2c33241b5bf918054518f128f",
       "max": 52,
       "min": 0,
       "orientation": "horizontal",
       "style": "IPY_MODEL_edc33e82be8f41eba6a18a0ef074ab7a",
       "tabbable": null,
       "tooltip": null,
       "value": 52
      }
     },
     "2f60367b1c8941e2bf71661c33969ae8": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "3865f25c78aa46f29a25d807205281c3": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "3d0b06deaa654b989eece8cde06fa0f8": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "3f8ceda83287475b97608e42f5f6782f": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "4881e496f1c84fe29ce9ebebaddfb3c2": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HBoxModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HBoxModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HBoxView",
       "box_style": "",
       "children": [
        "IPY_MODEL_bd096d5d219a467786a85cfe1613fedd",
        "IPY_MODEL_24e6eadc3dc940ecabf30dd1a3c6d1f3",
        "IPY_MODEL_bc2b8104b4244d8cacedeb95e800d91c"
       ],
       "layout": "IPY_MODEL_6b9a8e43c1c342dba500a14e7149b600",
       "tabbable": null,
       "tooltip": null
      }
     },
     "5ce1d1d9d86c40d9839877ff95734491": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "5ddb08be5cc64c9ab40a1d62a21763a5": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_86283159049d48b1adcfb2de2d404d4d",
       "placeholder": "​",
       "style": "IPY_MODEL_2133bb8d85d34b8db112b4408ad60320",
       "tabbable": null,
       "tooltip": null,
       "value": " 100/100 [08:34&lt;00:00, 10.01s/it]"
      }
     },
     "5ef9d83ccad1471f85335900a24a8553": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "6b9a8e43c1c342dba500a14e7149b600": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "816a079a8c804fbfa9b9a74f941abea8": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HBoxModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HBoxModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HBoxView",
       "box_style": "",
       "children": [
        "IPY_MODEL_bcc69ec5db1b4aab977807284c9290e7",
        "IPY_MODEL_0cd8c168767249f2a5fa412173f6e751",
        "IPY_MODEL_5ddb08be5cc64c9ab40a1d62a21763a5"
       ],
       "layout": "IPY_MODEL_d1178c6858284f788a80b5f2a14fd0b7",
       "tabbable": null,
       "tooltip": null
      }
     },
     "86283159049d48b1adcfb2de2d404d4d": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "8ff8262c56604119883f4a5f13bb74ab": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_5ef9d83ccad1471f85335900a24a8553",
       "placeholder": "​",
       "style": "IPY_MODEL_e89e77133c344fc48c1d62f5a607ec93",
       "tabbable": null,
       "tooltip": null,
       "value": " 8/8 [00:18&lt;00:00,  2.27s/it]"
      }
     },
     "9189a076554543aaa6f5ee04e40dbe1b": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "988e6697a2af486fadeaf0b84347b565": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HBoxModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HBoxModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HBoxView",
       "box_style": "",
       "children": [
        "IPY_MODEL_e1aae4c55cb64f379e74f15357275628",
        "IPY_MODEL_fd9e23198ca1489a9773fda3510bf857",
        "IPY_MODEL_8ff8262c56604119883f4a5f13bb74ab"
       ],
       "layout": "IPY_MODEL_d2ee15001d2244529f7e47d3333c0f8e",
       "tabbable": null,
       "tooltip": null
      }
     },
     "9fc7d91f94a94933bde5ba80e64587de": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "a7d240a289084bdfba4724c0efd5ab07": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "ProgressStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "ProgressStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "bar_color": null,
       "description_width": ""
      }
     },
     "bc2b8104b4244d8cacedeb95e800d91c": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_2f60367b1c8941e2bf71661c33969ae8",
       "placeholder": "​",
       "style": "IPY_MODEL_9fc7d91f94a94933bde5ba80e64587de",
       "tabbable": null,
       "tooltip": null,
       "value": " 52/52 [04:26&lt;00:00,  4.22s/it]"
      }
     },
     "bcc69ec5db1b4aab977807284c9290e7": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_3f8ceda83287475b97608e42f5f6782f",
       "placeholder": "​",
       "style": "IPY_MODEL_3d0b06deaa654b989eece8cde06fa0f8",
       "tabbable": null,
       "tooltip": null,
       "value": "100%"
      }
     },
     "bd096d5d219a467786a85cfe1613fedd": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_3865f25c78aa46f29a25d807205281c3",
       "placeholder": "​",
       "style": "IPY_MODEL_9189a076554543aaa6f5ee04e40dbe1b",
       "tabbable": null,
       "tooltip": null,
       "value": "100%"
      }
     },
     "cc3ed8dc4a5c43aca7b62d904865b2fa": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "cf68b6fe24964ce792aa63827489cb97": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "d1178c6858284f788a80b5f2a14fd0b7": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "d2ee15001d2244529f7e47d3333c0f8e": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "e1aae4c55cb64f379e74f15357275628": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_23b1ad9c0f9c46c888da66e85c90eb84",
       "placeholder": "​",
       "style": "IPY_MODEL_cf68b6fe24964ce792aa63827489cb97",
       "tabbable": null,
       "tooltip": null,
       "value": "100%"
      }
     },
     "e89e77133c344fc48c1d62f5a607ec93": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "edc33e82be8f41eba6a18a0ef074ab7a": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "ProgressStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "ProgressStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "bar_color": null,
       "description_width": ""
      }
     },
     "fa4bddf2c33241b5bf918054518f128f": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "fd9e23198ca1489a9773fda3510bf857": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "FloatProgressModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "FloatProgressModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "ProgressView",
       "bar_style": "success",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_cc3ed8dc4a5c43aca7b62d904865b2fa",
       "max": 8,
       "min": 0,
       "orientation": "horizontal",
       "style": "IPY_MODEL_a7d240a289084bdfba4724c0efd5ab07",
       "tabbable": null,
       "tooltip": null,
       "value": 8
      }
     }
    },
    "version_major": 2,
    "version_minor": 0
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
